kernel 3.10内核源码分析--块设备层request plug/unplug机制 3
(2015-01-30 16:25:11)2)
kblockd工作队列的工作内容
kblockd工作队列的工作内容有由blk_delay_work()函数实现,主要就是调用__blk_run_queue进行unplug请求队列。
2、unplug机制
内核中设计了两种unplug机制:
1)调度时进行unplug(异步方式)
当发生内核调度时,当前进程sleep前,先将当前task的plug列表中的请求flush到派发队列中,并进行unplug。
主要代码流程如下:
schedule->
sched_submit_work
->
blk_schedule_flush_plug()->
blk_flush_plug_list(plug,
true)
->注意:这里传入的from_schedule参数为true,表示将触发异步unplug,即唤醒kblockd工作队列来进行unplug操作。后续的kblockd的唤醒周期在块设备驱动中设置,比如scsi中设置为3ms。
queue_unplugged->
blk_run_queue_async
queue_unplugged():
blk_run_queue_async():
scsi_request_fn()://scsi块设备驱动的request_fn()接口,其中当scsi命令下发失败时,会重设kblockd,延迟unplug请求队列。
2)提交IO请求时(make_request)进行unplug
提交IO请求时(make_request),先将请求提交时先链入此队列,当该队列满时(>BLK_MAX_REQUEST_COUNT),会flush到相应设备的请求队列中(request_queue)。
主要代码流程为:
submit_bio->
generic_make_request->
make_request->
blk_queue_bio->
blk_flush_plug_list(plug,
false) ->注意:这里传入的from_schedule参数为false,表示将触发同步unplug,即当即下发请求。
queue_unplugged->
blk_run_queue_async
->
__blk_run_queue
普通块设备的make_request接口在3.10内核版本中被设置为blk_queue_bio,相应代码分析如下:
本文章有上海计算机培训 上海电脑培训 机构推荐阅读
kblockd工作队列的工作内容有由blk_delay_work()函数实现,主要就是调用__blk_run_queue进行unplug请求队列。
点击(此处)折叠或打开
-
/*IO请求队列的delay_work,用于在kblockd中异步unplug请求队列*/
-
static void blk_delay_work(struct work_struct
*work)
-
{
-
struct request_queue
*q;
-
/*获取delay_work所在的请求队列*/
-
q
= container_of(work, struct request_queue, delay_work.work);
-
spin_lock_irq(q->queue_lock);
-
/*直接run
queue,最终调用request_fn对队列中的请求逐一处理*/
-
__blk_run_queue(q);
-
spin_unlock_irq(q->queue_lock);
- }
2、unplug机制
内核中设计了两种unplug机制:
1)调度时进行unplug(异步方式)
当发生内核调度时,当前进程sleep前,先将当前task的plug列表中的请求flush到派发队列中,并进行unplug。
主要代码流程如下:
schedule->
点击(此处)折叠或打开
-
/*unplug请求队列,plug相当于蓄水,将请求放入池子(请求队列)中,unplug相当于放水,即开始调用请求队列的request_fn(scsi_request_fn)来处理请求队列中的请求,将请求提交到scsi层(块设备驱动层)*/
-
static void queue_unplugged(struct request_queue
*q, unsigned int depth,
-
bool from_schedule) -
__releases(q->queue_lock)
-
{
-
trace_block_unplug(q,
depth, !from_schedule);
-
/*调用块设备驱动层提供的request_fn接口处理请求队列中的请求,分异步和同步两种情况。*/
-
if
(from_schedule)
-
/*异步unplug,即通过kblockd工作队列来处理,该工作队列定期唤醒(5s),通过这种方式可以控制流量,提高吞吐量*/
-
blk_run_queue_async(q);
-
else
-
/*同步unplug,即直接调用设备驱动层提供的request_fn接口处理请求队列中的请求*/
-
__blk_run_queue(q);
-
spin_unlock(q->queue_lock);
- }
blk_run_queue_async():
点击(此处)折叠或打开
-
/*异步unplug,即通过kblockd工作队列来处理,该工作队列定期唤醒(5s),通过这种方式可以控制流量,提高吞吐量*/
-
void blk_run_queue_async(struct request_queue
*q)
-
{
-
if
(likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
-
/*唤醒kblockd相关的工作队列,进行unplug处理,注意:这里的delay传入0表示立刻唤醒,kblockd对应的处理接口为:blk_delay_work*/
-
mod_delayed_work(kblockd_workqueue,
&q->delay_work, 0);
- }
scsi_request_fn()://scsi块设备驱动的request_fn()接口,其中当scsi命令下发失败时,会重设kblockd,延迟unplug请求队列。
点击(此处)折叠或打开
-
static void scsi_request_fn(struct
request_queue
*q) -
{
-
...
-
/*
-
*
Dispatch the command to the low-level driver. -
*/
-
/*将scsi命令下发到底层驱动,当返回非0时,表示命令下发失败,则当前的请求队列需要被plug*/
-
rtn
= scsi_dispatch_cmd(cmd);
-
spin_lock_irq(q->queue_lock);
-
/*命令下发失败,需要plug请求队列*/
-
if
(rtn)
-
goto out_delay
-
...
-
out_delay:
-
if
(sdev->device_busy == 0)
-
/*命令下发失败,需要延迟处理,需plug请求队列,设置3ms定时启动kblockd工作队列,进行请求队列的unplug*/
-
blk_delay_queue(q,
SCSI_QUEUE_DELAY);
-
blk_delay_queue
-
/*在指定msecs时间后启动kblockd工作队列*/
-
void blk_delay_queue(struct request_queue
*q, unsigned long msecs) -
{
-
if
(likely(!blk_queue_dead(q)))
-
queue_delayed_work(kblockd_workqueue,
&q->delay_work,
-
msecs_to_jiffies(msecs)); - }
2)提交IO请求时(make_request)进行unplug
提交IO请求时(make_request),先将请求提交时先链入此队列,当该队列满时(>BLK_MAX_REQUEST_COUNT),会flush到相应设备的请求队列中(request_queue)。
主要代码流程为:
submit_bio->
普通块设备的make_request接口在3.10内核版本中被设置为blk_queue_bio,相应代码分析如下:
点击(此处)折叠或打开
-
/*在submit_bio中被调用,用于合并bio,并提交请求(request),请求提交到per
task的plug list中*/
-
void blk_queue_bio(struct request_queue
*q, struct bio *bio)
-
{
-
const bool sync = !!(bio->bi_rw & REQ_SYNC);
-
struct blk_plug *plug;
-
int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
-
struct request *req;
-
unsigned int request_count = 0;
-
-
/*
-
* low level driver can indicate that it wants pages above a -
* certain limit bounced to low memory (ie for highmem, or even
-
* ISA dma in theory)
-
*/
-
/*bounce buffer(回弹缓冲区)使用*/ -
blk_queue_bounce(q, &bio);
-
-
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-
bio_endio(bio, -EIO);
-
return;
-
}
-
-
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
-
spin_lock_irq(q->queue_lock);
-
where = ELEVATOR_INSERT_FLUSH;
-
goto get_rq; -
}
-
-
/*
-
* Check if we can merge with the plugged list before grabbing -
* any locks. -
*/
-
/*尝试将bio合并到request中*/
-
if (blk_attempt_plug_merge(q, bio, &request_count))
-
return;
-
-
spin_lock_irq(q->queue_lock);
-
-
el_ret = elv_merge(q, &req, bio);
-
/*向后合并*/
-
if (el_ret == ELEVATOR_BACK_MERGE) {
-
if (bio_attempt_back_merge(q, req, bio)) {
-
elv_bio_merged(q, req, bio);
-
if (!attempt_back_merge(q, req))
-
elv_merged_request(q, req, el_ret);
-
goto out_unlock; -
}
-
/*向前合并*/
-
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
-
if (bio_attempt_front_merge(q, req, bio)) {
-
elv_bio_merged(q, req, bio);
-
if (!attempt_front_merge(q, req))
-
elv_merged_request(q, req, el_ret);
-
goto out_unlock; -
}
-
}
-
/*不能合并,需要新建request来处理bio*/
-
get_rq:
-
/*
-
* This sync check and mask will be re-done in init_request_from_bio(),
-
* but we need to set it earlier to expose the sync flag to the
-
* rq allocator and io schedulers. -
*/
-
rw_flags = bio_data_dir(bio);
-
/*判断是否需要sync,即直接将IO请求unplug(提交到块设备驱动层),不用等待kblockd来定期plug*/
-
if (sync)
-
rw_flags |= REQ_SYNC;
-
-
/*
-
* Grab a free request. This is might sleep but can not fail.
-
* Returns with the queue unlocked. -
*/
-
/*从请求队列中取一个request*/
-
req = get_request(q, rw_flags, bio, GFP_NOIO);
-
if (unlikely(!req)) {
-
bio_endio(bio, -ENODEV); /* @q is dead */
-
goto out_unlock; -
}
-
-
/*
-
* After dropping the lock and possibly sleeping here, our request -
* may now be mergeable after it had proven unmergeable (above).
-
* We don't worry about that case for efficiency. It won't happen -
* often, and the elevators are able to handle it. -
*/
-
/*将bio加入新的request中*/
-
init_request_from_bio(req, bio);
-
-
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
-
req->cpu = raw_smp_processor_id();
-
-
plug = current->plug;
-
/*如果有plug,则将请求加入到plug的list中,如果没有则直接调用__blk_run_queue提交请求*/
-
if (plug) {
-
/*
-
* If this is the first request added after a plug, fire
-
* of a plug trace. If others have been added before, check
-
* if we have multiple devices in this plug. If so, make a -
* note to sort the list before dispatch. -
*/
-
if (list_empty(&plug->list))
-
trace_block_plug(q);
-
else {/*如果请求队列中的请求数超过了限值,则先unplug?*/
-
if (request_count >= BLK_MAX_REQUEST_COUNT) {
-
blk_flush_plug_list(plug, false);
-
trace_block_plug(q);
-
}
-
}
-
/*把请求加入到plug的list中,当plug的list满了后(>BLK_MAX_REQUEST_COUNT),会flush到相应设备的请求队列中(request_queue)*/
-
list_add_tail(&req->queuelist, &plug->list);
-
blk_account_io_start(req, true);
-
} else {
-
spin_lock_irq(q->queue_lock);
-
add_acct_request(q, req, where);
-
/*如果没有plug控制,最终调用此接口处理队列中的请求,最终会调用请求队列的request_fn接口处理请求*/
-
__blk_run_queue(q);
-
out_unlock:
-
spin_unlock_irq(q->queue_lock);
-
}
- }