]> asedeno.scripts.mit.edu Git - linux.git/blob - net/sched/sch_red.c
net_sch: red: Add offload ability to RED qdisc
[linux.git] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25
26
27 /*      Parameters, settable by user:
28         -----------------------------
29
30         limit           - bytes (must be > qth_max + burst)
31
32         Hard limit on queue length, should be chosen >qth_max
33         to allow packet bursts. This parameter does not
34         affect the algorithms behaviour and can be chosen
35         arbitrarily high (well, less than ram size)
36         Really, this limit will never be reached
37         if RED works correctly.
38  */
39
40 struct red_sched_data {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49 };
50
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53         return q->flags & TC_RED_ECN;
54 }
55
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58         return q->flags & TC_RED_HARDDROP;
59 }
60
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62                        struct sk_buff **to_free)
63 {
64         struct red_sched_data *q = qdisc_priv(sch);
65         struct Qdisc *child = q->qdisc;
66         int ret;
67
68         q->vars.qavg = red_calc_qavg(&q->parms,
69                                      &q->vars,
70                                      child->qstats.backlog);
71
72         if (red_is_idling(&q->vars))
73                 red_end_of_idle_period(&q->vars);
74
75         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76         case RED_DONT_MARK:
77                 break;
78
79         case RED_PROB_MARK:
80                 qdisc_qstats_overlimit(sch);
81                 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82                         q->stats.prob_drop++;
83                         goto congestion_drop;
84                 }
85
86                 q->stats.prob_mark++;
87                 break;
88
89         case RED_HARD_MARK:
90                 qdisc_qstats_overlimit(sch);
91                 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92                     !INET_ECN_set_ce(skb)) {
93                         q->stats.forced_drop++;
94                         goto congestion_drop;
95                 }
96
97                 q->stats.forced_mark++;
98                 break;
99         }
100
101         ret = qdisc_enqueue(skb, child, to_free);
102         if (likely(ret == NET_XMIT_SUCCESS)) {
103                 qdisc_qstats_backlog_inc(sch, skb);
104                 sch->q.qlen++;
105         } else if (net_xmit_drop_count(ret)) {
106                 q->stats.pdrop++;
107                 qdisc_qstats_drop(sch);
108         }
109         return ret;
110
111 congestion_drop:
112         qdisc_drop(skb, sch, to_free);
113         return NET_XMIT_CN;
114 }
115
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118         struct sk_buff *skb;
119         struct red_sched_data *q = qdisc_priv(sch);
120         struct Qdisc *child = q->qdisc;
121
122         skb = child->dequeue(child);
123         if (skb) {
124                 qdisc_bstats_update(sch, skb);
125                 qdisc_qstats_backlog_dec(sch, skb);
126                 sch->q.qlen--;
127         } else {
128                 if (!red_is_idling(&q->vars))
129                         red_start_of_idle_period(&q->vars);
130         }
131         return skb;
132 }
133
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136         struct red_sched_data *q = qdisc_priv(sch);
137         struct Qdisc *child = q->qdisc;
138
139         return child->ops->peek(child);
140 }
141
142 static void red_reset(struct Qdisc *sch)
143 {
144         struct red_sched_data *q = qdisc_priv(sch);
145
146         qdisc_reset(q->qdisc);
147         sch->qstats.backlog = 0;
148         sch->q.qlen = 0;
149         red_restart(&q->vars);
150 }
151
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154         struct red_sched_data *q = qdisc_priv(sch);
155         struct net_device *dev = qdisc_dev(sch);
156         struct tc_red_qopt_offload opt = {
157                 .handle = sch->handle,
158                 .parent = sch->parent,
159         };
160
161         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162                 return -EOPNOTSUPP;
163
164         if (enable) {
165                 opt.command = TC_RED_REPLACE;
166                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168                 opt.set.probability = q->parms.max_P;
169                 opt.set.is_ecn = red_use_ecn(q);
170         } else {
171                 opt.command = TC_RED_DESTROY;
172         }
173
174         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
175 }
176
177 static void red_destroy(struct Qdisc *sch)
178 {
179         struct red_sched_data *q = qdisc_priv(sch);
180
181         del_timer_sync(&q->adapt_timer);
182         red_offload(sch, false);
183         qdisc_destroy(q->qdisc);
184 }
185
186 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
187         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
188         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
189         [TCA_RED_MAX_P] = { .type = NLA_U32 },
190 };
191
192 static int red_change(struct Qdisc *sch, struct nlattr *opt)
193 {
194         struct red_sched_data *q = qdisc_priv(sch);
195         struct nlattr *tb[TCA_RED_MAX + 1];
196         struct tc_red_qopt *ctl;
197         struct Qdisc *child = NULL;
198         int err;
199         u32 max_P;
200
201         if (opt == NULL)
202                 return -EINVAL;
203
204         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
205         if (err < 0)
206                 return err;
207
208         if (tb[TCA_RED_PARMS] == NULL ||
209             tb[TCA_RED_STAB] == NULL)
210                 return -EINVAL;
211
212         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
213
214         ctl = nla_data(tb[TCA_RED_PARMS]);
215
216         if (ctl->limit > 0) {
217                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
218                 if (IS_ERR(child))
219                         return PTR_ERR(child);
220         }
221
222         if (child != &noop_qdisc)
223                 qdisc_hash_add(child, true);
224         sch_tree_lock(sch);
225         q->flags = ctl->flags;
226         q->limit = ctl->limit;
227         if (child) {
228                 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
229                                           q->qdisc->qstats.backlog);
230                 qdisc_destroy(q->qdisc);
231                 q->qdisc = child;
232         }
233
234         red_set_parms(&q->parms,
235                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
236                       ctl->Plog, ctl->Scell_log,
237                       nla_data(tb[TCA_RED_STAB]),
238                       max_P);
239         red_set_vars(&q->vars);
240
241         del_timer(&q->adapt_timer);
242         if (ctl->flags & TC_RED_ADAPTATIVE)
243                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
244
245         if (!q->qdisc->q.qlen)
246                 red_start_of_idle_period(&q->vars);
247
248         sch_tree_unlock(sch);
249         red_offload(sch, true);
250         return 0;
251 }
252
253 static inline void red_adaptative_timer(struct timer_list *t)
254 {
255         struct red_sched_data *q = from_timer(q, t, adapt_timer);
256         struct Qdisc *sch = q->sch;
257         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
258
259         spin_lock(root_lock);
260         red_adaptative_algo(&q->parms, &q->vars);
261         mod_timer(&q->adapt_timer, jiffies + HZ/2);
262         spin_unlock(root_lock);
263 }
264
265 static int red_init(struct Qdisc *sch, struct nlattr *opt)
266 {
267         struct red_sched_data *q = qdisc_priv(sch);
268
269         q->qdisc = &noop_qdisc;
270         q->sch = sch;
271         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
272         return red_change(sch, opt);
273 }
274
275 static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
276 {
277         struct net_device *dev = qdisc_dev(sch);
278         struct tc_red_qopt_offload hw_stats = {
279                 .handle = sch->handle,
280                 .parent = sch->parent,
281                 .command = TC_RED_STATS,
282                 .stats.bstats = &sch->bstats,
283                 .stats.qstats = &sch->qstats,
284         };
285         int err;
286
287         opt->flags &= ~TC_RED_OFFLOADED;
288         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
289                 return 0;
290
291         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
292                                             &hw_stats);
293         if (err == -EOPNOTSUPP)
294                 return 0;
295
296         if (!err)
297                 opt->flags |= TC_RED_OFFLOADED;
298
299         return err;
300 }
301
302 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
303 {
304         struct red_sched_data *q = qdisc_priv(sch);
305         struct nlattr *opts = NULL;
306         struct tc_red_qopt opt = {
307                 .limit          = q->limit,
308                 .flags          = q->flags,
309                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
310                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
311                 .Wlog           = q->parms.Wlog,
312                 .Plog           = q->parms.Plog,
313                 .Scell_log      = q->parms.Scell_log,
314         };
315         int err;
316
317         sch->qstats.backlog = q->qdisc->qstats.backlog;
318         err = red_dump_offload(sch, &opt);
319         if (err)
320                 goto nla_put_failure;
321
322         opts = nla_nest_start(skb, TCA_OPTIONS);
323         if (opts == NULL)
324                 goto nla_put_failure;
325         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
326             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
327                 goto nla_put_failure;
328         return nla_nest_end(skb, opts);
329
330 nla_put_failure:
331         nla_nest_cancel(skb, opts);
332         return -EMSGSIZE;
333 }
334
335 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
336 {
337         struct red_sched_data *q = qdisc_priv(sch);
338         struct net_device *dev = qdisc_dev(sch);
339         struct tc_red_xstats st = {
340                 .early  = q->stats.prob_drop + q->stats.forced_drop,
341                 .pdrop  = q->stats.pdrop,
342                 .other  = q->stats.other,
343                 .marked = q->stats.prob_mark + q->stats.forced_mark,
344         };
345
346         if (tc_can_offload(dev) &&  dev->netdev_ops->ndo_setup_tc) {
347                 struct red_stats hw_stats = {0};
348                 struct tc_red_qopt_offload hw_stats_request = {
349                         .handle = sch->handle,
350                         .parent = sch->parent,
351                         .command = TC_RED_XSTATS,
352                         .xstats = &hw_stats,
353                 };
354                 if (!dev->netdev_ops->ndo_setup_tc(dev,
355                                                    TC_SETUP_QDISC_RED,
356                                                    &hw_stats_request)) {
357                         st.early += hw_stats.prob_drop + hw_stats.forced_drop;
358                         st.pdrop += hw_stats.pdrop;
359                         st.other += hw_stats.other;
360                         st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
361                 }
362         }
363
364         return gnet_stats_copy_app(d, &st, sizeof(st));
365 }
366
367 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
368                           struct sk_buff *skb, struct tcmsg *tcm)
369 {
370         struct red_sched_data *q = qdisc_priv(sch);
371
372         tcm->tcm_handle |= TC_H_MIN(1);
373         tcm->tcm_info = q->qdisc->handle;
374         return 0;
375 }
376
377 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
378                      struct Qdisc **old)
379 {
380         struct red_sched_data *q = qdisc_priv(sch);
381
382         if (new == NULL)
383                 new = &noop_qdisc;
384
385         *old = qdisc_replace(sch, new, &q->qdisc);
386         return 0;
387 }
388
389 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
390 {
391         struct red_sched_data *q = qdisc_priv(sch);
392         return q->qdisc;
393 }
394
395 static unsigned long red_find(struct Qdisc *sch, u32 classid)
396 {
397         return 1;
398 }
399
400 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
401 {
402         if (!walker->stop) {
403                 if (walker->count >= walker->skip)
404                         if (walker->fn(sch, 1, walker) < 0) {
405                                 walker->stop = 1;
406                                 return;
407                         }
408                 walker->count++;
409         }
410 }
411
412 static const struct Qdisc_class_ops red_class_ops = {
413         .graft          =       red_graft,
414         .leaf           =       red_leaf,
415         .find           =       red_find,
416         .walk           =       red_walk,
417         .dump           =       red_dump_class,
418 };
419
420 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
421         .id             =       "red",
422         .priv_size      =       sizeof(struct red_sched_data),
423         .cl_ops         =       &red_class_ops,
424         .enqueue        =       red_enqueue,
425         .dequeue        =       red_dequeue,
426         .peek           =       red_peek,
427         .init           =       red_init,
428         .reset          =       red_reset,
429         .destroy        =       red_destroy,
430         .change         =       red_change,
431         .dump           =       red_dump,
432         .dump_stats     =       red_dump_stats,
433         .owner          =       THIS_MODULE,
434 };
435
436 static int __init red_module_init(void)
437 {
438         return register_qdisc(&red_qdisc_ops);
439 }
440
441 static void __exit red_module_exit(void)
442 {
443         unregister_qdisc(&red_qdisc_ops);
444 }
445
446 module_init(red_module_init)
447 module_exit(red_module_exit)
448
449 MODULE_LICENSE("GPL");