This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Enhancements to block IO & IO scheduler tapset


Hi,

Here is a patch to add tracepoint-based probes to block IO and IO scheduler tapsets.
I had to add new probe aliases instead of adding fallbacks to existing probes because the tracepoint that flags an event, say , elv_add_request, is not defined at function entry but somewhere in the interior. So an equivalent kprobe based probe mapping to the same point would need to be a statement probe, which I didnt think to be a scalable design choice.


Also, the handlers for a lot of probe aliases are duplicated -- they essentially expose the same set of local variables available in the probe. Is there some way I could reduce duplication here ? I tried to define multiple probe aliases to use the same probe definition :

Example,
probe ioscheduler_trace.plug = kernel.trace("block_plug"),
probe ioscheduler_trace.unplug_io = kernel.trace("block_unplug_io")
{
..do something..
}

But the systemtap translator doesnt seem to support that atm. Would this be a good-to-have language enhancement? Or, would a common initializing function be useful here that could be called for each probe alias?

Looking fwd to feedback...

--
Prerna Saxena

Linux Technology Centre,
IBM Systems and Technology Lab,
Bangalore, India
Index: stap-git-Oct-01/tapset/ioscheduler.stp
===================================================================
--- stap-git-Oct-01.orig/tapset/ioscheduler.stp
+++ stap-git-Oct-01/tapset/ioscheduler.stp
@@ -142,6 +142,174 @@ probe ioscheduler.elv_completed_request
 %)
 }
 
+/**
+ * probe ioscheduler_trace.elv_add_request : Indicates a request is added
+ *						to the request queue.
+ * @elevator_name	: The type of I/O elevator currently enabled.
+ * @rq			: Address of request.
+ * @rq_flags		: Request flags.
+ * @disk_major		: Disk major no of request.
+ * @disk_minor		: Disk minor number of request.
+ *
+ */
+probe ioscheduler_trace.elv_add_request
+			= kernel.trace("block_rq_insert")
+{
+elevator_name = kernel_string($q->elevator->elevator_type->elevator_name)
+rq = $rq
+
+if ($rq == 0 || $rq->rq_disk ==0) {
+	disk_major = -1
+	disk_minor = -1
+} else {
+		disk_major = $rq->rq_disk->major
+		disk_minor = $rq->rq_disk->first_minor
+}
+
+rq_flags = $rq==0? 0:$rq->cmd_flags
+}
+
+/**
+ * probe ioscheduler_trace.elv_completed_request : Fires when a request is
+ *							completed.
+ * @elevator_name	: The type of I/O elevator currently enabled.
+ * @rq			: Address of request.
+ * @rq_flags		: Request flags.
+ * @disk_major		: Disk major no of request.
+ * @disk_minor		: Disk minor number of request.
+ *
+ */
+probe ioscheduler_trace.elv_completed_request
+			= kernel.trace("block_rq_complete")
+{
+elevator_name = kernel_string($q->elevator->elevator_type->elevator_name)
+rq = $rq
+
+if ($rq == 0 || $rq->rq_disk ==0) {
+	disk_major = -1
+	disk_minor = -1
+} else {
+		disk_major = $rq->rq_disk->major
+		disk_minor = $rq->rq_disk->first_minor
+}
+
+rq_flags = $rq==0? 0:$rq->cmd_flags
+}
+
+/**
+ * probe ioscheduler_trace.elv_issue_request : Fires when a request is
+ *							scheduled.
+ * @elevator_name	: The type of I/O elevator currently enabled.
+ * @rq			: Address of request.
+ * @rq_flags		: Request flags.
+ * @disk_major		: Disk major no of request.
+ * @disk_minor		: Disk minor number of request.
+ *
+ */
+probe ioscheduler_trace.elv_issue_request
+			= kernel.trace("block_rq_issue")
+{
+elevator_name = kernel_string($q->elevator->elevator_type->elevator_name)
+rq = $rq
+
+if ($rq == 0 || $rq->rq_disk ==0) {
+	disk_major = -1
+	disk_minor = -1
+} else {
+		disk_major = $rq->rq_disk->major
+		disk_minor = $rq->rq_disk->first_minor
+}
+
+rq_flags = $rq==0? 0:$rq->cmd_flags
+}
+
+/**
+ * probe ioscheduler_trace.elv_requeue_request : Fires when a request is
+ *	put back on the queue, when the hadware cannot accept more requests.
+ * @elevator_name	: The type of I/O elevator currently enabled.
+ * @rq			: Address of request.
+ * @rq_flags		: Request flags.
+ * @disk_major		: Disk major no of request.
+ * @disk_minor		: Disk minor number of request.
+ *
+ */
+probe ioscheduler_trace.elv_requeue_request
+			= kernel.trace("block_rq_requeue")
+{
+elevator_name = kernel_string($q->elevator->elevator_type->elevator_name)
+rq = $rq
+
+if ($rq == 0 || $rq->rq_disk ==0) {
+	disk_major = -1
+	disk_minor = -1
+} else {
+		disk_major = $rq->rq_disk->major
+		disk_minor = $rq->rq_disk->first_minor
+}
+
+rq_flags = $rq==0? 0:$rq->cmd_flags
+}
+
+/**
+ * probe ioscheduler_trace.elv_abort_request : Fires when a request is aborted.
+ * @elevator_name	: The type of I/O elevator currently enabled.
+ * @rq			: Address of request.
+ * @rq_flags		: Request flags.
+ * @disk_major		: Disk major no of request.
+ * @disk_minor		: Disk minor number of request.
+ *
+ */
+probe ioscheduler_trace.elv_abort_request
+			= kernel.trace("block_rq_abort")
+{
+elevator_name = kernel_string($q->elevator->elevator_type->elevator_name)
+rq = $rq
+
+if ($rq == 0 || $rq->rq_disk ==0) {
+	disk_major = -1
+	disk_minor = -1
+} else {
+		disk_major = $rq->rq_disk->major
+		disk_minor = $rq->rq_disk->first_minor
+}
+
+rq_flags = $rq==0? 0:$rq->cmd_flags
+}
+
+/**
+ * probe ioscheduler_trace.plug - Fires when a request queue is plugged;
+ *		ie, requests in the queue cannot be serviced by block driver.
+ * @rq_queue	: request queue
+ *
+ */
+probe ioscheduler_trace.plug = kernel.trace("block_plug")
+{
+  rq_queue = $q
+}
+
+/**
+ * probe ioscheduler_trace.unplug_io - Fires when a request queue is unplugged;
+ *	Either, when number of pending requests in the queue exceeds threshold
+ *	or, upon expiration of timer that was activated when queue was plugged.
+ * @rq_queue	: request queue
+ *
+ */
+probe ioscheduler_trace.unplug_io = kernel.trace("block_unplug_io")
+{
+  rq_queue = $q
+}
+
+/**
+ * probe ioscheduler_trace.unplug_timer - Fires when unplug timer associated
+ *					with a request queue expires.
+ * @rq_queue	: request queue
+ *
+ */
+probe ioscheduler_trace.unplug_timer = kernel.trace("block_unplug_timer")
+{
+  rq_queue = $q
+}
+
 function disk_major_from_request:long(var_q:long)
 %{ /* pure */
 	struct request_queue *q = (struct request_queue *)((long)THIS->var_q);
Index: stap-git-Oct-01/tapset/ioblock.stp
===================================================================
--- stap-git-Oct-01.orig/tapset/ioblock.stp
+++ stap-git-Oct-01/tapset/ioblock.stp
@@ -186,3 +186,156 @@ probe ioblock.end = kernel.function("bio
 %)
         size = $bio->bi_size
 }
+
+/**
+ * probe ioblock_trace.bounce
+ *
+ * 	Fires whenever a buffer bounce is needed for at least one page of
+ *	a block IO request.
+ *
+ * Context :
+ *	The process creating a block IO request.
+ *
+ * Variables :
+ *  $bio       struct bio *
+ *  $q         struct request_queue*
+ *  devname    device for which a buffer bounce was needed.
+ *  ino       - i-node number of the mapped file
+ *  byte_done - number of bytes transferred
+ *  sector    - beginning sector for the entire bio
+ *  flags     - see below
+ *      BIO_UPTODATE    0       ok after I/O completion
+ *      BIO_RW_BLOCK    1       RW_AHEAD set, and read/write would block
+ *      BIO_EOF         2       out-out-bounds error
+ *      BIO_SEG_VALID   3       nr_hw_seg valid
+ *      BIO_CLONED      4       doesn't own data
+ *      BIO_BOUNCED     5       bio is a bounce bio
+ *      BIO_USER_MAPPED 6       contains user pages
+ *      BIO_EOPNOTSUPP  7       not supported
+ *  error     - 0 on success
+ *  rw        - binary trace for read/write request
+ *  vcnt      - bio vector count which represents number of array element (page,
+ *              offset, length) which makes up this I/O request
+ *  idx       - offset into the bio vector array
+ *  phys_segments - number of segments in this bio after physical address
+ *                  coalescing is performed.
+ *  size      - total size in bytes
+ */
+probe ioblock_trace.bounce = kernel.trace("block_bio_bounce")
+{
+        devname = __bio_devname($bio)
+        ino = __bio_ino($bio)
+
+        bytes_done = $bio->bi_size
+        sector = $bio->bi_sector
+        flags = $bio->bi_flags
+        rw = $bio->bi_rw
+        vcnt = $bio->bi_vcnt
+        idx = $bio->bi_idx
+        phys_segments = $bio->bi_phys_segments
+        size = $bio->bi_size
+}
+
+/* probe ioblock_trace.request
+ *
+ *  Fires just as a generic block I/O request is created for a bio.
+ *
+ * Context:
+ *  The process makes block I/O request
+ *
+ * Variables:
+ *  $bio       struct bio* for which IO request is to be submitted
+ *  $q         struct request_queue* to which the request is to be added
+ *  devname   - block device name
+ *  ino       - i-node number of the mapped file
+ *  sector    - beginning sector for the entire bio
+ *  flags     - see below
+ *	BIO_UPTODATE    0       ok after I/O completion
+ *	BIO_RW_BLOCK    1       RW_AHEAD set, and read/write would block
+ *	BIO_EOF         2       out-out-bounds error
+ *	BIO_SEG_VALID   3       nr_hw_seg valid
+ *	BIO_CLONED      4       doesn't own data
+ *	BIO_BOUNCED     5       bio is a bounce bio
+ *	BIO_USER_MAPPED 6       contains user pages
+ *	BIO_EOPNOTSUPP  7       not supported
+ *
+ *  rw        - binary trace for read/write request
+ *  vcnt      - bio vector count which represents number of array element (page,
+ *              offset, length) which make up this I/O request
+ *  idx       - offset into the bio vector array
+ *  phys_segments - number of segments in this bio after physical address
+ *                  coalescing is performed.
+ *  size      - total size in bytes
+ *  bdev      - target block device
+ *  bdev_contains - points to the device object which contains the
+ *                  partition (when bio structure represents a partition)
+ *  p_start_sect -  points to the start sector of the partition
+ *                  structure of the device
+ */
+
+probe ioblock_trace.request = kernel.trace("block_bio_queue")
+{
+        devname = __bio_devname($bio)
+        ino = __bio_ino($bio)
+
+        bytes_done = $bio->bi_size
+        error = $error
+        sector = $bio->bi_sector
+        flags = $bio->bi_flags
+        rw = $bio->bi_rw
+        vcnt = $bio->bi_vcnt
+        idx = $bio->bi_idx
+        phys_segments = $bio->bi_phys_segments
+        size = $bio->bi_size
+        bdev_contains = $bio->bi_bdev->bd_contains
+        bdev = $bio->bi_bdev
+        p_start_sect = __bio_start_sect($bio)
+}
+
+/* probe ioblock_trace.end
+ *
+ *  Fires whenever a block I/O transfer is complete.
+ *
+ * Context:
+ *  The process signals the transfer is done.
+ *
+ * Variables:
+ *  devname   - block device name
+ *  ino       - i-node number of the mapped file
+ *  byte_done - number of bytes transferred
+ *  sector    - beginning sector for the entire bio
+ *  flags     - see below
+ *      BIO_UPTODATE    0       ok after I/O completion
+ *      BIO_RW_BLOCK    1       RW_AHEAD set, and read/write would block
+ *      BIO_EOF         2       out-out-bounds error
+ *      BIO_SEG_VALID   3       nr_hw_seg valid
+ *      BIO_CLONED      4       doesn't own data
+ *      BIO_BOUNCED     5       bio is a bounce bio
+ *      BIO_USER_MAPPED 6       contains user pages
+ *      BIO_EOPNOTSUPP  7       not supported
+
+ *  error     - 0 on success
+ *  rw        - binary trace for read/write request
+ *  vcnt      - bio vector count which represents number of array element (page,
+ *              offset, length) which makes up this I/O request
+ *  idx       - offset into the bio vector array
+ *  phys_segments - number of segments in this bio after physical address
+ *                  coalescing is performed.
+ *  size      - total size in bytes
+ */
+probe ioblock_trace.end = kernel.function("bio_endio")
+{
+        devname = __bio_devname($bio)
+        ino = __bio_ino($bio)
+
+        bytes_done = $bio->bi_size
+        error = $error
+
+        sector = $bio->bi_sector
+        flags = $bio->bi_flags
+        rw = $bio->bi_rw
+        vcnt = $bio->bi_vcnt
+        idx = $bio->bi_idx
+        phys_segments = $bio->bi_phys_segments
+        size = $bio->bi_size
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]