Weblog

All | CMT | General | NUMA | OpenSolaris | Perl | Photo | Programmers Desk | STREAMS
20050819 Friday August 19, 2005

STREAMS: tr a-z A-Z

STREAMS: tr a-z A-Z

In my earlier entry I discussed the issues with flow control. This time I want to look at the module which actually looks at the data being passed and will convert all output to upper case letters. Basically, it will do the same thing as tr a-z A-Z command does, but using STREAMS

The core part of the module is the upmod_upcase() function that examines all M_DATA messages in the mblk and converts every symbol to upper case:

#define	islower(x)	(((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
#define	isupper(x)	(((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z'))
#define	toupper(x)	(isupper(x) ? (x) : (unsigned)(x) - 'a' + 'A')

/*
 * Convert all ASCII chars in data blocks to upper case
 */
static mblk_t *
upmod_upcase(mblk_t *passed_mp)
{
	mblk_t *mp = passed_mp;

	for (; mp != NULL; mp = mp->b_cont) {
		if ((DB_TYPE(mp) == M_DATA) && (MBLKL(mp) > 0)) {
			unsigned char *p;

			for (p = mp->b_rptr; p < mp->b_wptr; p++)
				if (islower(*p))
					*p = toupper(*p);
		}
	}
	return (passed_mp);
}

The DB_TYPE(mp) macro simply returns mp->b_datab->db_type value and MBLKL(mp) is the amount of data between the read and write pointers . These macros together with some other useful definitions are defined in sys/strsun.h file:

#define	DB_BASE(mp)	((mp)->b_datap->db_base)
#define	DB_LIM(mp)	((mp)->b_datap->db_lim)
#define	DB_REF(mp)	((mp)->b_datap->db_ref)
#define	DB_TYPE(mp)	((mp)->b_datap->db_type)

#define	MBLKL(mp)	((mp)->b_wptr - (mp)->b_rptr)
#define	MBLKSIZE(mp)	((mp)->b_datap->db_lim - (mp)->b_datap->db_base)
#define	MBLKHEAD(mp)	((mp)->b_rptr - (mp)->b_datap->db_base)
#define	MBLKTAIL(mp)	((mp)->b_datap->db_lim - (mp)->b_wptr)
#define	MBLKIN(mp, off, len) (((off) <= MBLKL(mp)) && \
			(((mp)->b_rptr + (off) + (len)) <= (mp)->b_wptr))

Now we can modify the read-side put procedure to call upmod_upcase for each mblock seen on input. static void upmodrput(queue_t *q, mblk_t *mp) { upmodput(q, upmod_upcase(mp)); }

Here is the full example:

/*
 * This example demonstrates a minimum STREAMS module that honors flow control.
 * It converts all data bytes on the read side to the upper case.
 */

/*
 * Required include files.
 */
#include	
#include	
#include	
#include	
#include	
#include	
#include	

/*
 * Function prototypes.
 */
static int	upmodopen(queue_t *, dev_t *, int, int, cred_t *);
static int	upmodclose(queue_t *);

static void	upmodput(queue_t *, mblk_t *);
static void	upmodrput(queue_t *, mblk_t *);
static void	upmodsrv(queue_t *);
static mblk_t	*upmod_upcase(mblk_t *mp);

/*
 * Module linkage data
 */
static struct module_info	upmod_minfo = {
	2,		/* mi_idnum */
	"upmod",	/* mi_idname */
	0,		/* mi_minpsz */
	INFPSZ,		/* mi_maxpsz */
	4096,		/* mi_hiwat */
	512		/* mi_lowat */
};

static struct qinit	upmod_rinit = {
	(int (*)())upmodput,	/* qi_putp */
	(int (*)())upmodsrv,	/* qi_srvp  */
	upmodopen,	/* qi_qopen */
	upmodclose,	/* qi_qclose */
	NULL,		/* qi_qadmin */
	&upmod_minfo,	/* qi_minfo */
};

static struct qinit	upmod_winit = {
	(int (*)())upmodrput,	/* qi_putp */
	(int (*)())upmodsrv,	/* qi_srvp */
	NULL,		/* qi_qopen */
	NULL,		/* qi_qclose */
	NULL,		/* qi_qadmin */
	&upmod_minfo,	/* qi_minfo */
};

static struct streamtab	upmod_info = {
	&upmod_rinit,	/* st_rdinit */
	&upmod_winit,	/* st_wrinit */
};

static struct fmodsw fsw = {
	"upmod",
	&upmod_info,
	D_MP | D_MTPERQ
};

/*
 * Module linkage information for the kernel.
 */
struct mod_ops mod_strmodops;

static struct modlstrmod modlstrmod = {
	&mod_strmodops, "Example up-through module 1.0", &fsw
};

static struct modlinkage modlinkage = {
	MODREV_1, (void *)&modlstrmod, NULL
};

/*
 * Standard module entry points.
 */
int
_init(void)
{
	return (mod_install(&modlinkage));
}

int
_fini(void)
{
	return (mod_remove(&modlinkage));
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}


/*
 * Actual module code.
 */

/*
 * STREAMS entry points.
 */

/* ARGSUSED */
static int
upmodopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
{
	if (sflag != MODOPEN)
		return (EINVAL);

	/* Prevent duplicate opens */
	if (rq->q_ptr != NULL)
		return (0);

	rq->q_ptr = WR(rq)->q_ptr = (void *)1;

	qprocson(rq);
	/*
	 * At this point module is linked in the STREAM and can send/receive
	 * messages. Its put/service procedures may execute at any time.
	 */
	return (0);
}

static int
upmodclose(queue_t *rq)
{
	qprocsoff(rq);
	rq->q_ptr = WR(rq)->q_ptr = NULL;
	/*
	 * At this point module is disconnected from the STREAM and can
	 * no longer receive messages. Its put or service procedures are not
	 * running.
	 */
	return (0);
}

/*
 * Support routines.
 */

/* Put procedure */
static void
upmodput(queue_t *q, mblk_t *mp)
{
	/*
	 * If the message is a high-priority message or there is no flow control
	 * and there are no messages in the queue already, pass it forward,
	 * otherwise queue.
	 */
	if (queclass(mp) == QPCTL ||
	    ((q->q_first == NULL) && canputnext(q)))
		putnext(q, mp);
	else
		(void) putq(q, mp);
}


/*
 * Support routines.
 */
static void
upmodrput(queue_t *q, mblk_t *mp)
{
	upmodput(q, upmod_upcase(mp));
}

/* Read/write side service routine */
static void
upmodsrv(queue_t *q)
{
	mblk_t *mp;

	/*
	 * Get messages from the service queue and pass them forward until flow
	 * controlled.
	 */
	while ((mp = getq(q)) != NULL) {
		if (canputnext(q)) {
			putnext(q, mp);
		} else {
			(void) putbq(q, mp);
			break;
		}
	}
}

#ifndef islower
#define	islower(x)	(((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
#endif

#ifndef isupper
#define	isupper(x)	(((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z'))
#endif

#ifndef toupper
#define	toupper(x)	(isupper(x) ? (x) : (unsigned)(x) - 'a' + 'A')
#endif

/*
 * Convert all ASCII chars in data blocks to upper case
 */
static mblk_t *
upmod_upcase(mblk_t *passed_mp)
{
	mblk_t *mp = passed_mp;

	for (; mp != NULL; mp = mp->b_cont) {
		if ((DB_TYPE(mp) == M_DATA) && (MBLKL(mp) > 0)) {
			uchar_t *p;

			for (p = mp->b_rptr; p < mp->b_wptr; p++)
				if (islower(*p))
					*p = toupper(*p);
		}
	}

	return (passed_mp);
}

Let us save it in file upmod.c, and compile it:


$ /usr/sfw/bin/gcc -c -m64 -o upmod.o -I/usr/include \
    -O -D_KERNEL  -D_SYSCALL32 -D_SYSCALL32_IMPL upmod.c
$ ld -r -o upmod upmod.o

Now we can install it. For example, on sparc system:


$ su
# cp upmod /kernel/strmod/sparcv9
# exit
$ strchg -h upmod
$ UPTIME
  4:53PM  UP 35 DAY(S),  4:38,  2 USERS,  LOAD AVERAGE: 0.01, 0.01, 0.00
USER     TTY           LOGIN@  IDLE   JCPU   PCPU  WHAT
USER    PTS/1         8AUG0510DAYS      8         BASH
USER    PTS/2         4:11PM           12      1  W
$ STRCHG -P
$ w
   4:55pm  up 35 day(s),  4:39,  2 users,  load average: 0.00, 0.01, 0.00
User     tty           login@  idle   JCPU   PCPU  what
user    pts/1         8Aug0510days      8         bash
user    pts/2         4:11pm           12      1  w

The output reminds me of the old Russian-made mainframes ES 1045 (Soviet clone of IBM 360/370 series) which could only output in all caps. Interestingly, at these times Russian-made computers usually used capital letters for English and lower letters for Russian. This was a precursor for KOI8-r encoding.


Technorati Tag:
Technorati Tag:
Technorati Tag:

( Aug 19 2005, 05:02:41 PM PDT ) Permalink Comments [0]

20050727 Wednesday July 27, 2005

STREAMS flow-control implementation STREAMS flow-control implementation

In my previous blog entry I discussed how to write a very simple STREAMS module that participates in the STREAMS flow control. It had two bugs in it - one intentional and one unintentional. Both were spotted by Yu Xiangning in the comments. The unintentional bug was in the setting of the flow control high and low water marks. This blog goes into more detail of the STREAMS flow control and discusses the actual implementation in Solaris.

STREAMS flow-control implementation

STREAMS have a simple flow-control mechanism that is voluntary by design. Participating modules and drivers ask the next queue whether it wishes to accept more messages by calling canputnext(9f) and if the next queue is "full" (it has more data than is specified in the module high-water mark) the module enqueues the data with putq(9f) or putbq(9f). The putq() and putbq() functions place the message on the module's queue and arrange a service procedure to be run some time later. If the service procedure returns without processing all messages on its queue it will not be called again unless it is either enabled explicitly by qenable(9f) or implicitly when the amount of data queued in the next module drops below low-water mark.

All modules participating in the flow control must have a service routine. The flow control operates between the two nearest queues in a stream containing service procedures. Detailed description of the flow control is contained in the Solaris STREAMS Programming Guide. The excellent UNIX System V Network Programming contains a very good description of the flow control in section 9.2:

A stream is said to be flow-controlled when its queues become full. When the number of bytes of data in the message on a queue becomes greater than the queue's high-water mark, the queue is considered full. Flow control is an advisory state where the processing element passing messages to the full queue stops sending messages and places them on its own queue. This way, flow control can propagate from one end of the stream to the other.

At the stream head, when a process tries to write to a stream whose topmost write queue with a service procedure is full, the process goes to sleep until the number of bytes of data stored in the queue reaches the queue's low-water mark. At this point the queue is no longer flow controlled. Note the distinction between the queue being full and being flow-controlled. The queue is only full as long as the amount of data it contains is over its high-water mark, but the queue remains flow controlled after the amount of data falls below the high-water mark. Of course, if the high and low-water marks are set to the same value, then there is no such distinction.

Source: Stephen A. Rago, UNIX System V Network Programming.

What we will do now is look at the actual code that implements all this. I will provide a simplified versions of most functions, ignoring locks and priority bands. By all means, look at the actual code to see all the missing parts.

canputnext()

The canputnext() function is pretty simple. it finds the next queue with a service procedure and checks whether it has QFULL flag set. If QFULL is not set, it returns 1 and if it is set, it sets QWANTW flag and returns 0. The QWANTW tells that another queue wants to place messages here, so it should be backenabled when the QFULL flag is dropped:

int
canputnext(queue_t *q)
{
	/* get next module forward with a service queue */
	q = q->q_next->q_nfsrv;

	if (!(q->q_flag & QFULL)) {
		return (1);
	} else {
		q->q_flag |= QWANTW;
		return (0);
	}
}

putq()

The putq() puts messages on a driver's queue. The message is placed after any other messages of the same priority, and flow control parameters are updated. If QNOENB is not set, the service routine is enabled:

/*
 * Put a message on a queue.
 *
 * Messages are enqueued on a priority basis.  The priority classes
 * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
 * and B_NORMAL (type < QPCTL && band == 0).
 *
 * Add appropriate weighted data block sizes to queue count.
 * If queue hits high water mark then set QFULL flag.
 *
 * If QNOENAB is not set (putq is allowed to enable the queue),
 * enable the queue only if the message is PRIORITY,
 * or the QWANTR flag is set (indicating that the service procedure
 * is ready to read the queue.  This implies that a service
 * procedure must NEVER put a high priority message back on its own
 * queue, as this would result in an infinite loop (!).
 */
int
putq(queue_t *q, mblk_t *bp)
{
	mblk_t *tmp;
	int	bytecnt = 0, mblkcnt = 0;

	/*
	 * If queue is empty, add the message and initialize the pointers.
	 * Otherwise, adjust message pointers and queue pointers.
	 */
	if (!q->q_first) {
		bp->b_next = NULL;
		bp->b_prev = NULL;
		q->q_first = bp;
		q->q_last = bp;
	} else {
		tmp = q->q_last;
		bp->b_next = NULL;
		bp->b_prev = tmp;
		tmp->b_next = bp;
		q->q_last = bp;
	}

	/* Get message byte count for q_count accounting */
	for (tmp = bp; tmp; tmp = tmp->b_cont) {
		bytecnt += (tmp->b_wptr - tmp->b_rptr);
		mblkcnt++;
	}
	q->q_count += bytecnt;
	q->q_mblkcnt += mblkcnt;
	if ((q->q_count >= q->q_hiwat) ||
	    (q->q_mblkcnt >= q->q_hiwat)) {
		q->q_flag |= QFULL;
	}

	/* Don't enable the queue that was noenable(9f)-ed */
	if ((canenable(q) && (q->q_flag & QWANTR)))
		qenable(q);

	return (1);
}

getq()

Thegetq() function fetches and returns the first message from the queue. As a side effect it may trigger back-enabling of the queues which were previously flow controlled because this queue had too much data. The back-enabling process will arrange the service routines of a previously flow-controlled module to be called. It will call getq() on its own queue which may cause further back-enabling, propagating the release of the flow control backwards down the stream.

/*
 * Get a message off head of queue
 *
 * If queue has no buffers then mark queue
 * with QWANTR. (queue wants to be read by
 * someone when data becomes available)
 *
 * If there is something to take off then do so.
 * If queue falls below hi water mark turn off QFULL
 * flag.  Decrement weighted count of queue.
 * Also turn off QWANTR because queue is being read.
 *
 * The queue count is maintained on a per-band basis.
 * Priority band 0 (normal messages) uses q_count,
 * q_lowat, etc.
 *
 * If queue count is below the lo water mark and QWANTW
 * is set, enable the closest backq which has a service
 * procedure and turn off the QWANTW flag.
 *
 * A note on the use of q_count and q_mblkcnt:
 *   q_count is the traditional byte count for messages that
 *   have been put on a queue.  Documentation tells us that
 *   we shouldn't rely on that count, but some drivers/modules
 *   do.  What was needed, however, is a mechanism to prevent
 *   runaway streams from consuming all of the resources,
 *   and particularly be able to flow control zero-length
 *   messages.  q_mblkcnt is used for this purpose.  It
 *   counts the number of mblk's that are being put on
 *   the queue.  The intention here, is that each mblk should
 *   contain one byte of data and, for the purpose of
 *   flow-control, logically does.  A queue will become
 *   full when EITHER of these values (q_count and q_mblkcnt)
 *   reach the highwater mark.  It will clear when BOTH
 *   of them drop below the highwater mark.  And it will
 *   backenable when BOTH of them drop below the lowwater
 *   mark.
 *   With this algorithm, a driver/module might be able
 *   to find a reasonably accurate q_count, and the
 *   framework can still try and limit resource usage.
 */
mblk_t *
getq(queue_t *q)
{
	mblk_t *bp;
	int band = 0;

	bp = getq_noenab(q);
	if (bp != NULL)
		band = bp->b_band;

	qbackenable(q, band);
	return (bp);
}

getq_noenab()

The getq_noenab() is a STREAMS framework internal function which does the actual job of fetching the message but doesn't deal with back-enabling the STREAM.

/*
 * Like getq() but does not backenable. The caller must call qbackenable()
 * after it is done with accessing the queue.
 */
mblk_t *
getq_noenab(queue_t *q)
{
	mblk_t *bp;
	mblk_t *tmp;
	int	bytecnt = 0, mblkcnt = 0;

	if ((bp = q->q_first) == 0) {
		q->q_flag |= QWANTR;
	} else {
		if ((q->q_first = bp->b_next) == NULL)
			q->q_last = NULL;
		else
			q->q_first->b_prev = NULL;

		/* Get message byte count for q_count accounting */
		for (tmp = bp; tmp; tmp = tmp->b_cont) {
			bytecnt += (tmp->b_wptr - tmp->b_rptr);
			mblkcnt++;
		}

		q->q_count -= bytecnt;
		q->q_mblkcnt -= mblkcnt;
		if ((q->q_count < q->q_hiwat) &&
		    (q->q_mblkcnt < q->q_hiwat)) {
			q->q_flag &= ~QFULL;

		q->q_flag &= ~QWANTR;
		bp->b_next = NULL;
		bp->b_prev = NULL;
	}
	return (bp);
}

qbackenable()

The qbackenable() function is another STREAMS internal function that checks whether queue back-enabling is required and calls the actual function backenable() doing back-enabling.

/*
 * Determine if a backenable is needed after removing a message in the
 * specified band.
 */
void
qbackenable(queue_t *q, int band)
{
	int backenab = 0;

	if (band == 0 && (q->q_flag & QWANTW) == 0)
		return;

	if (band == 0) {
		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
		    q->q_mblkcnt < q->q_lowat)) {
			backenab = q->q_flag & QWANTW;
		}
	} else {
		...
	}

	if (backenab & QWANTW) {
		q->q_flag &= ~QWANTW;
		backenable(q, band);
        }
}

backenable()

The backenable() is a STREAMS internal function that finds the nearest back queue with service procedure and enables it. The enabling involves arranging the service routine to be run sometime in the future. It is handled by the qenable_locked() function that is beyond the scope of this blog.

/*
/*
 * enable first back queue with svc procedure.
 * Use pri == -1 to avoid the setqback
 */
void
backenable(queue_t *q, int pri)
{
	queue_t	*nq;

	/* find nearest back queue with service proc */
	for (nq = backq(q); nq && !nq->q_qinfo->qi_srvp; nq = backq(nq))
		;

	if (nq) {
		if (pri != -1)
			setqback(nq, pri);
		qenable_locked(nq);
	}
}

Conclusion

The main purpose of this blog is to link the theoretical discussion of the STREAMS flow control with the actual code. A few lines in C is sometimes worth many words, but words do help to understand what this C code actually does.
Technorati Tag:
Technorati Tag:
Technorati Tag:
Technorati Tag:

( Jul 27 2005, 05:16:20 PM PDT ) Permalink Comments [10]

20050707 Thursday July 07, 2005

Grown up Do-Nothing STREAMS Module

Grown up Do-Nothing STREAMS Module

In my earlier entry I played with a simple STREAMS module that does nothing useful, but just passes messages back and forth. Now I want to extend this to a respectful STREAMS module fully participating in the STREAMS flow control. This means that in addition to the open/close entry points the module should define read and write put procedures and a service procedure. Previous module was called "nullmod", this module will be called "passmod".

Let us start with the put procedure. It can be as simple as

void
passmodput(queue_t *q, mblk_t *mp)
{
	putnext(q, mp);
}

What we now want to do is to check that the next module in the STREAM can accept our message. We do this by calling canputnext(9f) and using putq(9f) if canputnext() fails:

void
passmodput(queue_t *q, mblk_t *mp)
{
	if (canputnext(q)) {
		putnext(q, mp);
	} else {
		(void) putbq(q, mp);
	}
}

Here is the service routine. It gets all messages one by one and passes it down down the STREAM observing the flow control:

/* Read/write side service procedure. */
static void
passmodsrv(queue_t *q)
{
	mblk_t *mp;

	/*
	 * Get messages from the service queue and pass them forward until flow
	 * controlled.
	 */
	while ((mp = getq(q)) != NULL) {
		if (canputnext(q)) {
			putnext(q, mp);
		} else {
			(void) putbq(q, mp);
			break;
		}
	}
}

Now, what happens if by the time we enter the put procedure there are already some messages enqueued? We definitely do not want to call putnext() on the new message since it may arrive before the earlier messages and violate the message ordering in the STREAM. To defend against this problem we revise the put procedure a bit:

void
passmodput(queue_t *q, mblk_t *mp)
{
	if ((q->q_first == NULL) && canputnext(q)) {
		putnext(q, mp);
	} else {
		(void) putbq(q, mp);
	}
}

Now if there are any messages already enqueued we will continue enqueueing all new messages.This code is very straightforward, but a bit naive. The complication comes from the high-priority messages (which can be passed using RS_HIPRI flags to the putmsg(2) function. When you call putq() with the high-priority message, the STREAMS framework immediately enables the queue and calls its service procedure which will cause an infinite loop, so we should be a bit more accurate and always pass high priority messages. This means that we don't need to enqueue them in the first place, so we can rewrite the put procedure again to fix the problem:

void
passmodput(queue_t *q, mblk_t *mp)
{
	/*
	 * If the message is a high-priority message or there is no flow control
	 * and there are no messages in the queue already, pass it forward,
	 * otherwise enqueue. High priority message should be always passed
	 * forward.
	 */
	if (queclass(mp) == QPCTL ||
	    ((q->q_first == NULL) && canputnext(q)))
		putnext(q, mp);
	else
		(void) putq(q, mp);
}

Now we have all the components to construct a fully-functioning STREAMS module which correctly implements flow control. The full code is below.

NOTE: The code below contains a subtle bug. Try to find it before I explain the bug in the next blog entry.

/*
 * This example demonstrates a minimum STREAMS module that honors flow control.
 */

/*
 * Required include files.
 */
#include	
#include	
#include	
#include	
#include	
#include	
#include	

/*
 * Function prototypes.
 */
static int	passmodopen(queue_t *, dev_t *, int, int, cred_t *);
static int	passmodclose(queue_t *);
static void	passmodput(queue_t *, mblk_t *);
static void	passmodsrv(queue_t *);

/*
 * Module linkage data
 */
static struct module_info	passmod_minfo = {
	2,		/* mi_idnum */
	"passmod",	/* mi_idname */
	0,		/* mi_minpsz */
	INFPSZ,		/* mi_maxpsz */
	0,		/* mi_hiwat */
	0		/* mi_lowat */
};

static struct qinit	passmod_rinit = {
	(int (*)())passmodput,	/* qi_putp */
	(int (*)())passmodsrv,	/* qi_srvp  */
	passmodopen,	/* qi_qopen */
	passmodclose,	/* qi_qclose */
	NULL,		/* qi_qadmin */
	&passmod_minfo,	/* qi_minfo */
};

static struct qinit	passmod_winit = {
	(int (*)())passmodput,	/* qi_putp */
	(int (*)())passmodsrv,	/* qi_srvp */
	NULL,		/* qi_qopen */
	NULL,		/* qi_qclose */
	NULL,		/* qi_qadmin */
	&passmod_minfo,	/* qi_minfo */
};

static struct streamtab	passmod_info = {
	&passmod_rinit,	/* st_rdinit */
	&passmod_winit,	/* st_wrinit */
};

static struct fmodsw fsw = {
	"passmod",
	&passmod_info,
	D_MP
};

/*
 * Module linkage information for the kernel.
 */
struct mod_ops mod_strmodops;

static struct modlstrmod modlstrmod = {
	&mod_strmodops, "Example pass-through module 1.0", &fsw
};

static struct modlinkage modlinkage = {
	MODREV_1, (void *)&modlstrmod, NULL
};

/*
 * Standard module entry points.
 */
int
_init(void)
{
	return (mod_install(&modlinkage));
}

int
_fini(void)
{
	return (mod_remove(&modlinkage));
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}


/*
 * Actual module code.
 */

/*
 * STREAMS entry points.
 */

/* ARGSUSED */
static int
passmodopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
{
	if (sflag != MODOPEN)
		return (EINVAL);

	/* Prevent duplicate opens */
	if (rq->q_ptr != NULL)
		return (0);

	rq->q_ptr = WR(rq)->q_ptr = (void *)1;

	qprocson(rq);
	/*
	 * At this point module is linked in the STREAM and can send/receive
	 * messages. Its put/service procedures may execute at any time.
	 */
	return (0);
}

static int
passmodclose(queue_t *rq)
{
	qprocsoff(rq);
	rq->q_ptr = WR(rq)->q_ptr = NULL;
	/*
	 * At this point module is disconnected from the STREAM and can
	 * no longer receive messages. Its put or service procedures are not
	 * running.
	 */
	return (0);
}

/*
 * Support routines.
 */

/* Read/write side put procedure. */
static void
passmodput(queue_t *q, mblk_t *mp)
{
	/*
	 * If the message is a high-priority message or there is no flow control
	 * and there are no messages in the queue already, pass it forward,
	 * otherwise enqueue. High priority message should be always passed
	 * forward.
	 */
	if (queclass(mp) == QPCTL ||
	    ((q->q_first == NULL) && canputnext(q)))
		putnext(q, mp);
	else
		(void) putq(q, mp);
}

/* Read/write side service procedure. */
static void
passmodsrv(queue_t *q)
{
	mblk_t *mp;

	/*
	 * Get messages from the service queue and pass them forward until flow
	 * controlled.
	 */
	while ((mp = getq(q)) != NULL) {
		if (canputnext(q)) {
			putnext(q, mp);
		} else {
			(void) putbq(q, mp);
			break;
		}
	}
}


Technorati Tag:
Technorati Tag:
Technorati Tag:

( Jul 07 2005, 06:30:42 PM PDT ) Permalink Comments [14]

20050615 Wednesday June 15, 2005

MDB for STREAMS

OpenSolaris includes a very powerful and modular debugger MDB that is an invaluable tool for analyzing crash dumps and live systems. It comes with a comprehensive manual which is a great read. Here is a brief information for brave souls having to debug STREAMS-related issues.

Queues

The most useful MDB dcmd for most module and driver developers is ::queue. Typing ::help queue will show the full usage information:


NAME
  queue - filter and display STREAM queue

SYNOPSIS
  addr ::queue [-q|v] [-m mod] [-f flag] [-F flag] [-s syncq_addr]

DESCRIPTION
  
  Print queue information for a given queue pointer.
  
  Without the address of a "queue_t" structure given, print information about al
  l
  queues in the "queue_cache".
  
  Options:
        -v:             be verbose - print symbolic flags falues
        -q:             be quiet - print queue pointer only
        -f flag:        print only queues with flag set
        -F flag:        print only queues with flag NOT set
        -m modname:     print only queues with specified module name
        -s syncq_addr:  print only queues which use specified syncq
  
  Available conversions:
        q2rdq:          given a queue addr print read queue pointer
        q2wrq:          given a queue addr print write queue pointer
        q2otherq:       given a queue addr print other queue pointer
        q2syncq:        given a queue addr print syncq pointer (::help syncq)
        q2stream:       given a queue addr print its stream pointer
                (see ::help stream and ::help stdata)
  
  To walk q_next pointer of the queue use
        queue_addr::walk qnext

ATTRIBUTES

  Target: kvm
  Module: genunix
  Interface Stability: Unstable

If you just type ::queue, MDB will print information about all opened queue instances 1 in the system. For example, on my desktop:


> ::queue
    ADDR MODULE         FLAGS NBLK
d0353008 fifostrrhead  044032    0 00000000
d0353180 ip            204032    0 00000000
d03532f8 consms        202032    0 00000000
d0353470 strrhead      044032    0 00000000
d03535e8 tcp           204032    0 00000000
d0353760 strrhead      044032    0 00000000
d03538d8 conskbd       20c032    0 00000000
d0353a50 wc            242032    0 00000000
d0353bc8 ip            204032    0 00000000
d0353d40 strrhead      044032    0 00000000
d033f010 usbms         002032    0 00000000
d033f188 hid           201032    0 00000000
d033f300 consms        242032    0 00000000
d033f478 rts           108832    0 00000000
 ...

What you see in the output is the actual read queue address for each open module or instance, the module name, queue flags, number of messages on the module queue and the pointer to the first message in the queue. You can translate hex flag values into symbolic names adding -v option:


> ::queue -v
    ADDR MODULE         FLAGS NBLK
d0353008 fifostrrhead  044032    0 00000000
                       |
                       +-->  QWANTR       Someone wants to read Q
                             QREADR       This is the reader (first) Q
                             QUSE         This queue in use (allocation)
                             QMTSAFE      stream module is MT-safe
                             QEND         last queue in stream
d0353180 ip            204032    0 00000000
                       |
                       +-->  QWANTR       Someone wants to read Q
                             QREADR       This is the reader (first) Q
                             QUSE         This queue in use (allocation)
                             QMTSAFE      stream module is MT-safe
                             QISDRV       the Queue is attached to a driver
 ...

You can ask about specific queue by providing its address for the ::queue command:


> d0353180::queue -v                  
    ADDR MODULE         FLAGS NBLK
d0353180 ip            204032    0 00000000
                       |
                       +-->  QWANTR       Someone wants to read Q
                             QREADR       This is the reader (first) Q
                             QUSE         This queue in use (allocation)
                             QMTSAFE      stream module is MT-safe
                             QISDRV       the Queue is attached to a driver

What if you want to find all open instances of a specific module? This is easy with the -m flag. For example, to find all open instances of IP you can type


> ::queue -m ip
d0353180
d0353bc8
d197e5f8
d2263020
  ...

Notice that when you use any of the filtering flags, the ::queuedcmd prints only the address of matching queues. You can still get more detailed information by pipelining the output to the ::queue dcmd:


> ::queue -m ip | ::queue
    ADDR MODULE         FLAGS NBLK
d0353180 ip            204032    0 00000000
d0353bc8 ip            204032    0 00000000
d197e5f8 ip            244032    0 00000000
d2263020 ip            204032    0 00000000
  ...

You can use filtering options to find all queues with specific flag value set:


> ::queue -f QISDRV|::queue
    ADDR MODULE         FLAGS NBLK
d0353180 ip            204032    0 00000000
d03532f8 consms        202032    0 00000000
d03535e8 tcp           204032    0 00000000
d03538d8 conskbd       20c032    0 00000000
d0353a50 wc            242032    0 00000000
  ...

prints information about all driver queues. What if you want information about modules and not drivers? Easy:


> ::queue -F QISDRV|::queue
    ADDR MODULE         FLAGS NBLK
d0353008 fifostrrhead  044032    0 00000000
d0353470 strrhead      044032    0 00000000
d0353760 strrhead      044032    0 00000000
d0353d40 strrhead      044032    0 00000000
d033f010 usbms         002032    0 00000000
d033f478 rts           108832    0 00000000
  ...

Similarly you can find all queues which are flow controlled. Here is an example from a real core dump:


> ::queue -f QFULL|::queue
            ADDR MODULE         FLAGS NBLK
ffffffff83e9d000 strrhead      04403c   64 ffffffff953c0600
ffffffff82548018 strrhead      04403c  331 ffffffff949c2680
ffffffff85f4ba48 timod         00083c    7 ffffffffbfa04140
ffffffff83acf2e8 timod         00083c    3 ffffffff84659f00

Careful reader will notice that this will print information only about flow controlled read side queues. How about write side queues? Here comes a useful little ::q2wrq dcmd:


> ::queue -q | ::q2wrq | ::queue -f QFULL|::queue -v
            ADDR MODULE         FLAGS NBLK
ffffffff83cb0388 tl            24402c  205 fffffe80e3017040
                               |
                               +-->  QWANTW       Someone wants to write Q
                                     QFULL        Q is considered full
                                     QUSE         This queue in use (allocation)
                                     QMTSAFE      stream module is MT-safe
                                     QEND         last queue in stream
                                     QISDRV       the Queue is attached to a driver

The ::q2wrq dcmd simply maps the read queue pointer to the write queue pointer. The ::q2rdq performs the opposite mapping and the ::q2otherq dcmd maps read queue pointer to the write queue pointer and visa versa.

Stream Heads

Once you are comfortable dealing with individual queues you may wonder about the state of a whole open STREAM. The simplest way to find a stream containing a given queue is to apply a ::q2stream dcmd which maps a queue pointer to the stream head pointer. You can use a nice ::stream dcmd to display the whole stream graphically:


> ffffffff83acf2e8::q2stream|::stream

+-----------------------+-----------------------+
| 0xffffffff82548110    | 0xffffffff82548018    | 
| strwhead              | strrhead              | 
|                       |                       |
| cnt = 0t0             | cnt = 0t57420         | 
| flg = 0x00004022      | flg = 0x0004403c      | 
+-----------------------+-----------------------+
            |                       ^
            v                       |
+-----------------------+-----------------------+
| 0xffffffff83acf3e0    | 0xffffffff83acf2e8    | 
| timod                 | timod                 | 
|                       |                       |
| cnt = 0t0             | cnt = 0t528           | 
| flg = 0x00000822      | flg = 0x0000083c      | 
+-----------------------+-----------------------+
            |                       ^
            v                       |
+-----------------------+-----------------------+
| 0xffffffff83acf670    | 0xffffffff83acf578    | 
| udp                   | udp                   | 
|                       |                       |
| cnt = 0t0             | cnt = 0t0             | 
| flg = 0x00000822      | flg = 0x00000832      | 
+-----------------------+-----------------------+
            |                       ^
            v                       |
+-----------------------+-----------------------+
| 0xffffffff82a86128    | 0xffffffff82a86030    | 
| ip                    | ip                    | 
|                       |                       |
| cnt = 0t0             | cnt = 0t0             | 
| flg = 0x00244022      | flg = 0x00204032      | 
+-----------------------+-----------------------+

You can see all read and write queues of the stream and their state. The read side stream head is not very happy - it has 57420 bytes hanging around and blocking another 528 bytes in timod.

>p> Another way to look at the stream is to walk the read or write side using the qnext walker:


> 0xffffffff82548110::walk qnext|::queue 
            ADDR MODULE         FLAGS NBLK
ffffffff82548110 strwhead      004022    0 0000000000000000
ffffffff83acf3e0 timod         000822    0 0000000000000000
ffffffff83acf670 udp           000822    0 0000000000000000
ffffffff82a86128 ip            244022    0 0000000000000000

> 0xffffffff82a86030::walk qnext|::queue 
            ADDR MODULE         FLAGS NBLK
ffffffff82a86030 ip            204032    0 0000000000000000
ffffffff83acf578 udp           000832    0 0000000000000000
ffffffff83acf2e8 timod         00083c    3 ffffffff84659f00
ffffffff82548018 strrhead      04403c  331 ffffffff949c2680

A careful reader will notice that the ::stream dcmd displays how many bytes bytes are in the queue while ::queue displays ho many messages are there. Do we want to know what are these messages? here comes the next useful dcmd.

Messages

Let me introduce a new player - the ::mblk dcmd. Typing ::help mblk will show you all the gory details:


NAME
  mblk - print an mblk

SYNOPSIS
  addr ::mblk [-q|v] [-f|F flag] [-t|T type] [-l|L|B len] [-d dbaddr]

DESCRIPTION
  
  Print mblock information for a given mblk pointer.
  Without the address, print information about all mblocks.
  
  Fields printed:
        ADDR:   mblk address
        FL:     Flags
        TYPE:   Type of corresponding dblock
        LEN:    Data length as b_wptr - b_rptr
        BLEN:   Dblock space as db_lim - db_base
        RPTR:   Read pointer
        DBLK:   Dblock pointer
  
  Options:
        -v:             be verbose - print symbolic flags falues
        -q:             be quiet - print mblk pointer only
        -d dbaddr:      print mblks with specified dblk address
        -f flag:        print only mblks with flag set
        -F flag:        print only mblks with flag NOT set
        -t type:        print only mblks of specified db_type
        -T type:        print only mblks other then the specified db_type
        -l len:         tprint only mblks with MBLKL == len
        -L len:         print only mblks with MBLKL <= len 
        -G len:         print only mblks with MBLKL >= len 
        -b len:         print only mblks with db_lim - db_base == len
  

ATTRIBUTES

  Target: kvm
  Module: genunix
  Interface Stability: Unstable

It is easy to look at a specific message:


> ffffffff949c2680::mblk
            ADDR FL TYPE    LEN   BLEN              RPTR             DBLK
ffffffff949c2680 0  proto   56    80    ffffffff88f145b0 ffffffff88f14540

This output shows us that this is an M_PROTO message with 56 bytes of information starting at address ffffffff88f145b02. We know that messages like to hang out together. How do we print the whole b_cont chain? Like this:


> ffffffff949c2680::walk b_cont|::mblk
            ADDR FL TYPE    LEN   BLEN              RPTR             DBLK
ffffffff949c2680 0  proto   56    80    ffffffff88f145b0 ffffffff88f14540
ffffffff943b5240 0  data    108   144   ffffffff9868878c ffffffff98688700

Now we can see both the M_PROTO and the attached M_DATA messages, and we can see that UDP is sending 108 bytes of data upstream. Similarly, the b_next walker will follow the b_next chain:


> ffffffff84659f00::walk b_next|::mblk
            ADDR FL TYPE    LEN   BLEN              RPTR             DBLK
ffffffff84659f00 0  proto   56    80    ffffffff94257c70 ffffffff94257c00
fffffe80e3078340 0  proto   56    80    ffffffff94257bb0 ffffffff94257b40
ffffffff84658a40 0  proto   56    80    ffffffff942575b0 ffffffff94257540

And, of course, we can combine both in the pipeline:


> ffffffff84659f00::walk b_next|::walk b_cont|::mblk
            ADDR FL TYPE    LEN   BLEN              RPTR             DBLK
ffffffff84659f00 0  proto   56    80    ffffffff94257c70 ffffffff94257c00
ffffffff84659300 0  data    120   208   ffffffff82fc144c ffffffff82fc13c0
fffffe80e3078340 0  proto   56    80    ffffffff94257bb0 ffffffff94257b40
ffffffff846599c0 0  data    120   208   ffffffff8465120c ffffffff84651180
ffffffff84658a40 0  proto   56    80    ffffffff942575b0 ffffffff94257540
ffffffff84658b00 0  data    120   208   ffffffff84651d4c ffffffff84651cc0

If you are really curious, you can see all allocated messages by simply typing ::mblk:


> ::mblk
    ADDR FL TYPE    LEN   BLEN      RPTR     DBLK
d6d0e000 0  proto   24    64    cfedaf00 cfedaec0
d6d0e020 0  data    0     8192  d377d400 d23d7c00
d6d0e040 0  data    3     64    dc14d580 dc14d540
d6d0e060 0  proto   156   320   cfedbd00 cfedbcc0
d6d0e080 0  data    0     64    d366a9c4 d366a980
d6d0e0a0 0  data    80    320   cfedbe80 cfedbe40
d6d0e0c0 0  proto   24    64    cfe5e0c0 cfe5e080
d6d0e0e0 0  data    115   320   cfe490e7 cfe49080

As with the ::queue command you can filter by the message type. For example, you can look at M_DATA messages3 only:


> ::mblk -t M_DATA|::mblk
    ADDR FL TYPE    LEN   BLEN      RPTR     DBLK
d6d0e020 0  data    0     8192  d377d400 d23d7c00
d6d0e040 0  data    16    64    dc14d580 dc14d540
d6d0e080 0  data    0     64    d366a9c4 d366a980
d6d0e0a0 0  data    80    320   cfedbe80 cfedbe40
d6d0e0e0 0  data    115   320   cfe490e7 cfe49080
d6d0e140 0  data    0     64    d77c7480 d77c7440

Or you can find all 0-bytes messages:


> ::mblk -l 0|::mblk
    ADDR FL TYPE    LEN   BLEN      RPTR     DBLK
d6d0e020 0  data    0     8192  d377d400 d23d7c00
d6d0e040 0  data    0     64    dc1ab714 dc1ab6c0
d6d0e080 0  data    0     64    d366a9c4 d366a980
d6d0e140 0  data    0     64    d77c7480 d77c7440
...

In my opinion these example shows why MDB is really cool for kernel debugging. It can present huge amount of information in an informative way. Want to know how many 1-byte messages are floating around your system?


> ::mblk -l 0t1 ! wc -l 
      51

Conclusions

We flexed just a small muscle of the MDB and we already can go a long way in debugging complicated STREAMS issues in the kernel. We can study individual queues, STREAMS, messages, or get a bird's eye view and look at the picture from some distance. Whenever you encounter a difficult debugging problem MDB is your friend (if you know how to talk to it politely).


Footnotes:

1The ::queue command will print all read queues.

2The UDP module talks a protocol called TPI, so the M_PROTO message is one of the TPI primitives. The first part of the primitive is always its ID, we can get this by typing


> ffffffff88f145b0/D                      
0xffffffff88f145b0:             20              

Looking at usr/src/uts/common/sys/tihdr.h we see that this is the T_UNITDATA_IND message (makes sense). We can print the whole message now:


> ffffffff88f145b0::print 'struct T_unitdata_ind'
{
    PRIM_type = 0x14
    SRC_length = 0x10
    SRC_offset = 0x14
    OPT_length = 0x14
    OPT_offset = 0x24
}

3 Underground hackers special:


> ::mblk -t M_DATA|::print mblk_t b_rptr|/s
Jun 15 09:57:16 lpr[13390]: [ID 575460 FACILITY_AND_PRIORITY] net_response(18)
NOTICE: NFS4 FACT SHEET: 
 Action: NR_CLIENTID 
 NFS4 error: NFS4ERR_STALE_CLIENTID 
 Suspected server reboot.   
Jun 15 09:57:16 lpr[13390]: [ID 626751 FACILITY_AND_PRIORITY] net_write(18, 0x8045478, 12)
NOTICE: [NFS4][Server: onnv.sfbay][Mntpt: /ws/onnv-gate/usr]NFS server onnv.sfbay not responding; still trying
·gp0Jun 15 09:57:16 lpr[13390]: [ID 458697 FACILITY_AND_PRIORITY] net_open(bulka.SFBay, 5)
NOTICE: [NFS4][Server: onnv.sfbay][Mntpt: /ws/onnv-gate/usr]NFS server onnv.sfbay ok
...

Prints the content of all the data messages. I can see eyes flashing in the darkness!


Technorati Tag:
Technorati Tag:
Technorati Tag:
Technorati Tag:

( Jun 15 2005, 12:29:08 PM PDT ) Permalink Comments [1]

20050527 Friday May 27, 2005

Playing with STREAMS module from the User-Land

Playing with STREAMS module from the User-Land

In my previous STREAMS blog entry, we discussed, how to construct a do-nothing STREAMS module. Now we will come back to the user-land and see how we can play with STREAMS modules. We will learn, how to

  • List all modules in the stream;
  • Push a new module;
  • Pop a module;

Show me the STREAM

Suppose that you have an open file descriptor fd and would like to know what STREAMS modules and drivers live behind the scene in the kernel in the STREAM representing the file.

The article by Rajesh Ramchandani on the Sun Developer Network provides an excellent example with the full source code of the printmod() function. It uses the I_LIST> ioctl which returns a list of modules in the struct str_mlist structure.

Please add my module

The next thing we are going to try is pushing our new module onto the STREAM. The following simple function should do the trick:

int pushmod(int fd, char *modname)
{
	int rc;

	if ((rc = ioctl(fd, I_PUSH, modname)) < 0) {
		perror("I_PUSH");
		fprintf(stderr, "pushmod(%d, %s) failed\n", fd, modname);
	}
	return rc;
}

The function takes a file descriptor and the module name and pushes the module on top of the stream. It returns 0 on success and -1 on failure. We can extend it a bit to put a whole list of modules. Suppose that the module list is a string with commas separating module names:

/* Push list of modules separated by commas */
int pushlist(int fd, char *s)
{
	char *comma = strchr(s, ',');
	int rc = 0;

	if (comma == NULL)
		return (pushmod(fd, s));

	*comma = '\0';

	if (((rc = pushmod(fd, s)) >= 0) && *(comma+1) != '\0') {
		*comma = ',';
		rc = pushlist(fd, comma + 1);
	}

	return (rc);
}

The following example demonstrates how it can be used in practice:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stropts.h>
#include <strings.h>

void main(int argc, char *argv[])
{
	if (argc == 1)
		return;

	if (pushlist(0, argv[1]) < 0)
		exit(1);
	exit(0);
}

We can name this program as pushmod.c and try it (assuming that you have installed the nullmodmodule from the previous example:


 $ cc pushmod.c -o pushmod
 $ strconf
 ttcompat
 ldterm
 ptem
 pts
 $ ./pushmod nullmod,nullmod,nullmod
 nullmod
 nullmod
 nullmod
 ttcompat
 ldterm
 ptem
 pts

I don't want to see you any more!

Finally, you may remove the module from the top of the stream using a simple call


  if ((rc = ioctl(fd1, I_POP, 0)) < 0) {
  	perror("I_POP");

Conclusions.

Now you know how to manipulate the content of the STREAM. You may want to play with it a bit and see what happens if you insert and remove some interesting modules.

NOTE: It is quite likely that your terminal window will become unusable as a result of your experiments. Many modules assume certain context and the are designed to play in concert with others, so your terminal may misbehave if it is incorrectly configured.


Technorati Tag: OpenSolaris
Technorati Tag: Solaris

( May 27 2005, 07:53:08 PM PDT ) Permalink Comments [0]

20050513 Friday May 13, 2005

STREAMS Hello World

Ever wondered, what would it take to write a minimum STREAMS module that can be correctly installed onto Solaris system? Here is an example of such module (which we will call nullmod) with some explanations.

Every STREAMS module should define the following entry points:

o Module open routine;

o Module close routine;

o Module read side put procedure;

o Module write side put procedure;

o Module read side service procedure (optional);

o Module write side service procedure (optional);

Since our module will only pass messages back and forth it will use putnext(9f) as both write and read side put procedure and will not have any service procedures. So we need to define only open/close functions which we will call nullmodopen() and nullmodclose().

  /*
   * Module open routine.
   * Mark the module as "opened" and link it to
   * the STRREAM.
   */
  static int
  nullmodopen(queue_t *rq, dev_t *dev,
        int oflag, int sflag, cred_t *crp)
  {
        if (sflag != MODOPEN)
                return (EINVAL);

        /*
         * Prevent duplicate opens.
         * The q_ptr is reserved for module private use
         */
        if (rq->q_ptr != NULL)
                return (0);

        /* Mark the module as "opened" */
        rq->q_ptr = WR(rq)->q_ptr = (void *)1;

        /* Link the module into the STREAM */
        qprocson(rq);
        /*
         * At this point module is linked in the STREAM
         * and can send/receive messages.
         * Its put/service procedures may execute at any time.
         */
        return (0);
  }

  /*
   * Module close routine.
   * Disconnect the module from the STREAM.
   */
  static int
  nullmodclose(queue_t *rq)
  {
        /* Disconnect the module from the STREAM */
        qprocsoff(rq);
        /*
         * At this point module is disconnected from the STREAM and can
         * no longer receive messages. Its put or service procedures are not
         * running.
         */
        rq->q_ptr = WR(rq)->q_ptr = NULL;
        return (0);
  }

This is pretty much the only code we need to write. The rest is the glue code that should be present in any module. Here is the description of this glue code.

Every source file should start with comments, so we shall start the nullmod.c with the appropriate comment:

  /*
   * Nullmod: the minimal functioning STREAMS module.
   *
   * Copyright ..... (place your favorite one here).
   *
   */

We will need certain system include files:

  /*
   * Required include files.
   */
  #include      <sys/types.h>
  #include      <sys/conf.h>
  #include      <sys/cred.h>
  #include      <sys/ddi.h>
  #include      <sys/modctl.h>

As we discussed before, our module will define only two functions - nullmodopen() and nullmodclose():

  /*
   * Function prototypes.
   */
  static int    nullmodopen(queue_t *, dev_t *, int, int, cred_t *);
  static int    nullmodclose(queue_t *);

Every STREAMS kernel module should have a corresponding module_info(9S) structure:

  static struct module_info     nullmod_minfo = {
        1,              /* mi_idnum */
        "nullmod",      /* mi_idname */
        0,              /* mi_minpsz */
        INFPSZ,         /* mi_maxpsz */
        0,              /* mi_hiwat */
        0               /* mi_lowat */
  };

Also it should have qinit(9S) structures for both the read and write sides:

  static struct qinit nullmod_rinit = {
        (int (*)())putnext,     /* qi_putp */
        NULL,           /* qi_srvp  */
        nullmodopen,    /* qi_qopen */
        nullmodclose,   /* qi_qclose */
        NULL,           /* qi_qadmin */
        &nullmod_minfo, /* qi_minfo */
  };

  static struct qinit nullmod_winit = {
        (int (*)())putnext,     /* qi_putp */
        NULL,           /* qi_srvp */
        NULL,           /* qi_qopen */
        NULL,           /* qi_qclose */
        NULL,           /* qi_qadmin */
        &nullmod_minfo, /* qi_minfo */
  };

The streamtab(9S) structure links both together:

  static struct streamtab nullmod_info = {
        &nullmod_rinit, /* st_rdinit */
        &nullmod_winit, /* st_wrinit */
  };

The fmodsw(9S) structure describes tqhe module to the operating system, providing the pointer to the streamtab structure above:

  static struct fmodsw fsw = {
        "nullmod",      /* module name */
        &nullmod_info,  /* streams information */
        D_MP            /* module flags - multithreaded module */
  };

Now we need to provide the linkage information for the module:

  /*
   * Module linkage information for the kernel.
   */
  struct mod_ops mod_strmodops;

  static struct modlstrmod modlstrmod = {
        &mod_strmodops, "Example pass-through module 1.0", &fsw
  };

  static struct modlinkage modlinkage = {
        MODREV_1, (void *)&modlstrmod, NULL
  };

Every loadable kernel module should also provide _init, _fini and _info entry points.

_init(9E) initializes a loadable module. It is called before any other routine in a loadable module. Most modules do not require any specific initialization and can just call mod_install(9F). _fini(9E) prepares a loadable module for unloading. It is called when the system wants to unload a module. In most cases it can just call mod_remove(9F). _info(9E) returns information about a loadable module. In most cases _info(9E) just returns the value returned by mod_info(9F).

  /*
   * Standard module entry points.
   */
  int _init(void)
  {
        return (mod_install(&modlinkage));
  }

  int _fini(void)
  {
        return (mod_remove(&modlinkage));
  }

  int_info(struct modinfo *modinfop)
  {
        return (mod_info(&modlinkage, modinfop));
  }

Now we need to put our definitions for nullmodopen() and nullmodclose() above and we are done! Our ``do-nothing'' module is ready.

Let us save the module in the file nullmod.c. To compile it we need a C compiler which can generate the code for the native kernel mode - 32-it for x86 and 64-bit for Sparc or AMD64 platforms. If we are using Sun compilers we can use the following commands to produce the module binary on sparc:

  cc -c  -D_KERNEL -D_SYSCALL32 -D_SYSCALL32_IMPL -xarch=v9 nullmod.c

Or with gcc

  gcc -c -D_KERNEL -D_SYSCALL32 -D_SYSCALL32_IMPL -m64 nullmod.c

After that we need to use ld with -r option to produce the final binary:

  ld -r -o nullmod nullmod.o

Now we need to copy our module to /usr/kernel/strmod/sparcv9 (you need to be a super-user for this):

  # cp nullmod /usr/kernel/strmod/sparcv9

!!WARNING!!: you are going to install the kernel module. It is possible that the system will panic if there is some problem in the module. Please make sure that no one else is using the system and you will not upset anyone (including yourself) if the system panics as a result of your experiments. End of WARNING.

Now we can load the module:

  # modload nullmod

And verify that it is loaded:

  # modinfo | grep nullmod
  147 7bbbde30 320 - 1 nullmod (Example pass-through module 1.0)

We can also insert our module onto STDIN for our shell:

  # strchg -h nullmod

and verify that it is present:

  # strconf 
  nullmod
  ttcompat
  ldterm
  ptem
  pts

When we are done, we can remove the module from our shell STDIN:

  # strchg -p
  # strconf 
  ttcompat
  ldterm
  ptem
  pts

You can also unload the module (it is usually not rquired as Solaris unloads unused modules itself):

  # modunload -i 147
  # modinfo|grep nullmod
  #

Now you know how to

- Write a trivial Solaris STREAMS kernel module
- Compile the module
- Install the module on your system
- Load and unload the module

Why this module may be of any use? I can think of several reasons:

It provides a good template for more complicated modules

You can use it to estimate performance impact of inserting a module in a STREAM. This is the fastest module possible, so any additional code will only slow things down.

You can demonstrate your Solaris kernel skills duuring the job interview.

Not bad for a simple do-nothing program!

( May 13 2005, 09:06:15 PM PDT ) Permalink Comments [3]

Calendar

RSS Feeds

Search

Links

Navigation

Referers