Newer
Older
/*
* Anticipatory & deadline i/o scheduler.
*
* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
* Nick Piggin <nickpiggin@yahoo.com.au>
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
*
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/interrupt.h>
#define REQ_SYNC 1
#define REQ_ASYNC 0
/*
* See Documentation/block/as-iosched.txt
*/
/*
* max time before a read is submitted.
*/
#define default_read_expire (HZ / 8)
/*
* ditto for writes, these limits are not hard, even
* if the disk is capable of satisfying them.
*/
#define default_write_expire (HZ / 4)
/*
* read_batch_expire describes how long we will allow a stream of reads to
* persist before looking to see whether it is time to switch over to writes.
*/
#define default_read_batch_expire (HZ / 2)
/*
* write_batch_expire describes how long we want a stream of writes to run for.
* This is not a hard limit, but a target we set for the auto-tuning thingy.
* See, the problem is: we can send a lot of writes to disk cache / TCQ in
* a short amount of time...
*/
#define default_write_batch_expire (HZ / 8)
/*
* max time we may wait to anticipate a read (default around 6ms)
*/
#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)
/*
* Keep track of up to 20ms thinktimes. We can go as big as we like here,
* however huge values tend to interfere and not decay fast enough. A program
* might be in a non-io phase of operation. Waiting on user input for example,
* or doing a lengthy computation. A small penalty can be justified there, and
* will still catch out those processes that constantly have large thinktimes.
*/
#define MAX_THINKTIME (HZ/50UL)
/* Bits in as_io_context.state */
enum as_io_states {
AS_TASK_RUNNING=0, /* Process has not exited */
AS_TASK_IOSTARTED, /* Process has started some IO */
AS_TASK_IORUNNING, /* Process has completed some IO */
};
enum anticipation_status {
ANTIC_OFF=0, /* Not anticipating (normal operation) */
ANTIC_WAIT_REQ, /* The last read has not yet completed */
ANTIC_WAIT_NEXT, /* Currently anticipating a request vs
last read (which has completed) */
ANTIC_FINISHED, /* Anticipating but have found a candidate
* or timed out */
};
struct as_data {
/*
* run time data
*/
struct request_queue *q; /* the "owner" queue */
/*
* requests (as_rq s) are present on both sort_list and fifo_list
*/
struct rb_root sort_list[2];
struct list_head fifo_list[2];
struct as_rq *next_arq[2]; /* next in sort order */
sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
unsigned long exit_prob; /* probability a task will exit while
being waited on */
unsigned long exit_no_coop; /* probablility an exited task will
not be part of a later cooperating
request */
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
unsigned long new_ttime_total; /* mean thinktime on new proc */
unsigned long new_ttime_mean;
u64 new_seek_total; /* mean seek on new proc */
sector_t new_seek_mean;
unsigned long current_batch_expires;
unsigned long last_check_fifo[2];
int changed_batch; /* 1: waiting for old batch to end */
int new_batch; /* 1: waiting on first read complete */
int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */
int write_batch_count; /* max # of reqs in a write batch */
int current_write_count; /* how many requests left this batch */
int write_batch_idled; /* has the write batch gone idle? */
mempool_t *arq_pool;
enum anticipation_status antic_status;
unsigned long antic_start; /* jiffies: when it started */
struct timer_list antic_timer; /* anticipatory scheduling timer */
struct work_struct antic_work; /* Deferred unplugging */
struct io_context *io_context; /* Identify the expected process */
int ioc_finished; /* IO associated with io_context is finished */
int nr_dispatched;
/*
* settings that change how the i/o scheduler behaves
*/
unsigned long fifo_expire[2];
unsigned long batch_expire[2];
unsigned long antic_expire;
};
#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
/*
* per-request data.
*/
enum arq_state {
AS_RQ_NEW=0, /* New - not referenced and not on any lists */
AS_RQ_QUEUED, /* In the request queue. It belongs to the
scheduler */
AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the
driver now */
AS_RQ_PRESCHED, /* Debug poisoning for requests being used */
AS_RQ_REMOVED,
AS_RQ_MERGED,
AS_RQ_POSTSCHED, /* when they shouldn't be */
};
struct as_rq {
/*
* rbtree index, key is the starting offset
*/
struct rb_node rb_node;
sector_t rb_key;
struct request *request;
struct io_context *io_context; /* The submitting task */
/*
* expire fifo
*/
struct list_head fifo;
unsigned long expires;
unsigned int is_sync;
enum arq_state state;
};
#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
static kmem_cache_t *arq_pool;
static atomic_t ioc_count = ATOMIC_INIT(0);
static struct completion *ioc_gone;
static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
static void as_antic_stop(struct as_data *ad);
/*
* IO Context helper functions
*/
/* Called to deallocate the as_io_context */
static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
if (atomic_dec_and_test(&ioc_count) && ioc_gone)
complete(ioc_gone);
static void as_trim(struct io_context *ioc)
{
if (ioc->aic)
free_as_io_context(ioc->aic);
ioc->aic = NULL;
}
Loading
Loading full blame...