kernel 3.10内核源码分析--块设备层request plug/unplug机制 3_用户5259788255

http://blog.sina.com.cn/u/5259788255

首页博文目录关于我

个人资料

微博

加好友发纸条

写留言加关注

博客等级：
博客积分：

博客访问：
关注人气：
获赠金笔：0支
赠出金笔：0支
荣誉徽章：

正文字体大小：大中小

kernel 3.10内核源码分析--块设备层request plug/unplug机制 3

(2015-01-30 16:25:11)

2) kblockd工作队列的工作内容
kblockd工作队列的工作内容有由blk_delay_work()函数实现，主要就是调用__blk_run_queue进行unplug请求队列。

点击(此处)折叠或打开

/*IO请求队列的delay_work，用于在kblockd中异步unplug请求队列*/
static void blk_delay_work(struct work_struct *work)
{
struct request_queue *q;
/*获取delay_work所在的请求队列*/
q = container_of(work, struct request_queue, delay_work.work);
spin_lock_irq(q->queue_lock);
/*直接run queue，最终调用request_fn对队列中的请求逐一处理*/
__blk_run_queue(q);
spin_unlock_irq(q->queue_lock);
}

2、unplug机制
内核中设计了两种unplug机制：
1）调度时进行unplug(异步方式)
当发生内核调度时，当前进程sleep前，先将当前task的plug列表中的请求flush到派发队列中，并进行unplug。
主要代码流程如下：
schedule->
    sched_submit_work ->
        blk_schedule_flush_plug()->
            blk_flush_plug_list(plug, true) ->注意:这里传入的from_schedule参数为true，表示将触发异步unplug，即唤醒kblockd工作队列来进行unplug操作。后续的kblockd的唤醒周期在块设备驱动中设置，比如scsi中设置为3ms。
                queue_unplugged->
                    blk_run_queue_async

queue_unplugged():

点击(此处)折叠或打开

/*unplug请求队列，plug相当于蓄水，将请求放入池子(请求队列)中，unplug相当于放水，即开始调用请求队列的request_fn(scsi_request_fn)来处理请求队列中的请求，将请求提交到scsi层(块设备驱动层)*/
static void queue_unplugged(struct request_queue *q, unsigned int depth,
bool from_schedule)
__releases(q->queue_lock)
{
trace_block_unplug(q, depth, !from_schedule);
/*调用块设备驱动层提供的request_fn接口处理请求队列中的请求，分异步和同步两种情况。*/
if (from_schedule)
/*异步unplug，即通过kblockd工作队列来处理，该工作队列定期唤醒(5s)，通过这种方式可以控制流量，提高吞吐量*/
blk_run_queue_async(q);
else
/*同步unplug，即直接调用设备驱动层提供的request_fn接口处理请求队列中的请求*/
__blk_run_queue(q);
spin_unlock(q->queue_lock);
}

blk_run_queue_async():

点击(此处)折叠或打开

/*异步unplug，即通过kblockd工作队列来处理，该工作队列定期唤醒(5s)，通过这种方式可以控制流量，提高吞吐量*/
void blk_run_queue_async(struct request_queue *q)
{
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
/*唤醒kblockd相关的工作队列，进行unplug处理，注意:这里的delay传入0表示立刻唤醒，kblockd对应的处理接口为:blk_delay_work*/
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}

scsi_request_fn()://scsi块设备驱动的request_fn()接口，其中当scsi命令下发失败时，会重设kblockd，延迟unplug请求队列。

点击(此处)折叠或打开

static void scsi_request_fn(struct request_queue *q)
{
...
/*
* Dispatch the command to the low-level driver.
*/
/*将scsi命令下发到底层驱动，当返回非0时，表示命令下发失败，则当前的请求队列需要被plug*/
rtn = scsi_dispatch_cmd(cmd);
spin_lock_irq(q->queue_lock);
/*命令下发失败，需要plug请求队列*/
if (rtn)
goto out_delay
...
out_delay:
if (sdev->device_busy == 0)
/*命令下发失败，需要延迟处理，需plug请求队列，设置3ms定时启动kblockd工作队列，进行请求队列的unplug*/
blk_delay_queue(q, SCSI_QUEUE_DELAY);
blk_delay_queue
/*在指定msecs时间后启动kblockd工作队列*/
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
{
if (likely(!blk_queue_dead(q)))
queue_delayed_work(kblockd_workqueue, &q->delay_work,
msecs_to_jiffies(msecs));
}

2）提交IO请求时(make_request)进行unplug
提交IO请求时(make_request)，先将请求提交时先链入此队列，当该队列满时(>BLK_MAX_REQUEST_COUNT)，会flush到相应设备的请求队列中(request_queue)。
主要代码流程为：
submit_bio->
    generic_make_request->
        make_request->
            blk_queue_bio->
                blk_flush_plug_list(plug, false) ->注意:这里传入的from_schedule参数为false，表示将触发同步unplug，即当即下发请求。
                    queue_unplugged->
                        blk_run_queue_async ->
                            __blk_run_queue

普通块设备的make_request接口在3.10内核版本中被设置为blk_queue_bio，相应代码分析如下：

点击(此处)折叠或打开

/*在submit_bio中被调用，用于合并bio，并提交请求(request)，请求提交到per task的plug list中*/
void blk_queue_bio(struct request_queue *q, struct bio *bio)
{
const bool sync = !!(bio->bi_rw & REQ_SYNC);
struct blk_plug *plug;
int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
struct request *req;
unsigned int request_count = 0;
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
* ISA dma in theory)
*/
/*bounce buffer(回弹缓冲区)使用*/
blk_queue_bounce(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio_endio(bio, -EIO);
return;
}
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
}
/*
* Check if we can merge with the plugged list before grabbing
* any locks.
*/
/*尝试将bio合并到request中*/
if (blk_attempt_plug_merge(q, bio, &request_count))
return;
spin_lock_irq(q->queue_lock);
el_ret = elv_merge(q, &req, bio);
/*向后合并*/
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
/*向前合并*/
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
if (bio_attempt_front_merge(q, req, bio)) {
elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
}
/*不能合并，需要新建request来处理bio*/
get_rq:
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
rw_flags = bio_data_dir(bio);
/*判断是否需要sync，即直接将IO请求unplug(提交到块设备驱动层)，不用等待kblockd来定期plug*/
if (sync)
rw_flags |= REQ_SYNC;
/*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
/*从请求队列中取一个request*/
req = get_request(q, rw_flags, bio, GFP_NOIO);
if (unlikely(!req)) {
bio_endio(bio, -ENODEV); /* @q is dead */
goto out_unlock;
}
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
* We don't worry about that case for efficiency. It won't happen
* often, and the elevators are able to handle it.
*/
/*将bio加入新的request中*/
init_request_from_bio(req, bio);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
req->cpu = raw_smp_processor_id();
plug = current->plug;
/*如果有plug，则将请求加入到plug的list中，如果没有则直接调用__blk_run_queue提交请求*/
if (plug) {
/*
* If this is the first request added after a plug, fire
* of a plug trace. If others have been added before, check
* if we have multiple devices in this plug. If so, make a
* note to sort the list before dispatch.
*/
if (list_empty(&plug->list))
trace_block_plug(q);
else {/*如果请求队列中的请求数超过了限值，则先unplug?*/
if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
}
/*把请求加入到plug的list中，当plug的list满了后(>BLK_MAX_REQUEST_COUNT)，会flush到相应设备的请求队列中(request_queue)*/
list_add_tail(&req->queuelist, &plug->list);
blk_account_io_start(req, true);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
/*如果没有plug控制，最终调用此接口处理队列中的请求，最终会调用请求队列的request_fn接口处理请求*/
__blk_run_queue(q);
out_unlock:
spin_unlock_irq(q->queue_lock);
}
}

本文章有上海计算机培训上海电脑培训机构推荐阅读

阅读┊ 收藏 ┊ 喜欢 ▼ ┊打印┊举报/Report

前一篇：kernel 3.10内核源码分析--块设备层request plug/unplug机制 2

后一篇：修改及查看mysql数据库的字符集

新浪BLOG意见反馈留言板　欢迎批评指正