include/linux/blk-cgroup.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _BLK_CGROUP_H
   3 #define _BLK_CGROUP_H
   4 /*
   5  * Common Block IO controller cgroup interface
   6  *
   7  * Based on ideas and code from CFQ, CFS and BFQ:
   8  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   9  *
  10  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  11  *                    Paolo Valente <paolo.valente@unimore.it>
  12  *
  13  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  14  *                    Nauman Rafique <nauman@google.com>
  15  */
  16
  17 #include <linux/cgroup.h>
  18 #include <linux/percpu_counter.h>
  19 #include <linux/seq_file.h>
  20 #include <linux/radix-tree.h>
  21 #include <linux/blkdev.h>
  22 #include <linux/atomic.h>
  23 #include <linux/kthread.h>
  24
  25 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
  26 #define BLKG_STAT_CPU_BATCH     (INT_MAX / 2)
  27
  28 /* Max limits for throttle policy */
  29 #define THROTL_IOPS_MAX         UINT_MAX
  30
  31 #ifdef CONFIG_BLK_CGROUP
  32
  33 enum blkg_rwstat_type {
  34         BLKG_RWSTAT_READ,
  35         BLKG_RWSTAT_WRITE,
  36         BLKG_RWSTAT_SYNC,
  37         BLKG_RWSTAT_ASYNC,
  38         BLKG_RWSTAT_DISCARD,
  39
  40         BLKG_RWSTAT_NR,
  41         BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
  42 };
  43
  44 struct blkcg_gq;
  45
  46 struct blkcg {
  47         struct cgroup_subsys_state      css;
  48         spinlock_t                      lock;
  49
  50         struct radix_tree_root          blkg_tree;
  51         struct blkcg_gq __rcu           *blkg_hint;
  52         struct hlist_head               blkg_list;
  53
  54         struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
  55
  56         struct list_head                all_blkcgs_node;
  57 #ifdef CONFIG_CGROUP_WRITEBACK
  58         struct list_head                cgwb_list;
  59 #endif
  60 };
  61
  62 /*
  63  * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
  64  * recursive.  Used to carry stats of dead children, and, for blkg_rwstat,
  65  * to carry result values from read and sum operations.
  66  */
  67 struct blkg_stat {
  68         struct percpu_counter           cpu_cnt;
  69         atomic64_t                      aux_cnt;
  70 };
  71
  72 struct blkg_rwstat {
  73         struct percpu_counter           cpu_cnt[BLKG_RWSTAT_NR];
  74         atomic64_t                      aux_cnt[BLKG_RWSTAT_NR];
  75 };
  76
  77 /*
  78  * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  79  * request_queue (q).  This is used by blkcg policies which need to track
  80  * information per blkcg - q pair.
  81  *
  82  * There can be multiple active blkcg policies and each blkg:policy pair is
  83  * represented by a blkg_policy_data which is allocated and freed by each
  84  * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
  85  * area by allocating larger data structure which embeds blkg_policy_data
  86  * at the beginning.
  87  */
  88 struct blkg_policy_data {
  89         /* the blkg and policy id this per-policy data belongs to */
  90         struct blkcg_gq                 *blkg;
  91         int                             plid;
  92 };
  93
  94 /*
  95  * Policies that need to keep per-blkcg data which is independent from any
  96  * request_queue associated to it should implement cpd_alloc/free_fn()
  97  * methods.  A policy can allocate private data area by allocating larger
  98  * data structure which embeds blkcg_policy_data at the beginning.
  99  * cpd_init() is invoked to let each policy handle per-blkcg data.
 100  */
 101 struct blkcg_policy_data {
 102         /* the blkcg and policy id this per-policy data belongs to */
 103         struct blkcg                    *blkcg;
 104         int                             plid;
 105 };
 106
 107 /* association between a blk cgroup and a request queue */
 108 struct blkcg_gq {
 109         /* Pointer to the associated request_queue */
 110         struct request_queue            *q;
 111         struct list_head                q_node;
 112         struct hlist_node               blkcg_node;
 113         struct blkcg                    *blkcg;
 114
 115         /*
 116          * Each blkg gets congested separately and the congestion state is
 117          * propagated to the matching bdi_writeback_congested.
 118          */
 119         struct bdi_writeback_congested  *wb_congested;
 120
 121         /* all non-root blkcg_gq's are guaranteed to have access to parent */
 122         struct blkcg_gq                 *parent;
 123
 124         /* request allocation list for this blkcg-q pair */
 125         struct request_list             rl;
 126
 127         /* reference count */
 128         atomic_t                        refcnt;
 129
 130         /* is this blkg online? protected by both blkcg and q locks */
 131         bool                            online;
 132
 133         struct blkg_rwstat              stat_bytes;
 134         struct blkg_rwstat              stat_ios;
 135
 136         struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
 137
 138         struct rcu_head                 rcu_head;
 139
 140         atomic_t                        use_delay;
 141         atomic64_t                      delay_nsec;
 142         atomic64_t                      delay_start;
 143         u64                             last_delay;
 144         int                             last_use;
 145 };
 146
 147 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
 148 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
 149 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
 150 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
 151 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
 152 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
 153 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 154 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 155 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 156 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
 157 typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
 158                                       size_t size);
 159
 160 struct blkcg_policy {
 161         int                             plid;
 162         /* cgroup files for the policy */
 163         struct cftype                   *dfl_cftypes;
 164         struct cftype                   *legacy_cftypes;
 165
 166         /* operations */
 167         blkcg_pol_alloc_cpd_fn          *cpd_alloc_fn;
 168         blkcg_pol_init_cpd_fn           *cpd_init_fn;
 169         blkcg_pol_free_cpd_fn           *cpd_free_fn;
 170         blkcg_pol_bind_cpd_fn           *cpd_bind_fn;
 171
 172         blkcg_pol_alloc_pd_fn           *pd_alloc_fn;
 173         blkcg_pol_init_pd_fn            *pd_init_fn;
 174         blkcg_pol_online_pd_fn          *pd_online_fn;
 175         blkcg_pol_offline_pd_fn         *pd_offline_fn;
 176         blkcg_pol_free_pd_fn            *pd_free_fn;
 177         blkcg_pol_reset_pd_stats_fn     *pd_reset_stats_fn;
 178         blkcg_pol_stat_pd_fn            *pd_stat_fn;
 179 };
 180
 181 extern struct blkcg blkcg_root;
 182 extern struct cgroup_subsys_state * const blkcg_root_css;
 183
 184 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 185                                       struct request_queue *q, bool update_hint);
 186 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 187                                     struct request_queue *q);
 188 int blkcg_init_queue(struct request_queue *q);
 189 void blkcg_drain_queue(struct request_queue *q);
 190 void blkcg_exit_queue(struct request_queue *q);
 191
 192 /* Blkio controller policy registration */
 193 int blkcg_policy_register(struct blkcg_policy *pol);
 194 void blkcg_policy_unregister(struct blkcg_policy *pol);
 195 int blkcg_activate_policy(struct request_queue *q,
 196                           const struct blkcg_policy *pol);
 197 void blkcg_deactivate_policy(struct request_queue *q,
 198                              const struct blkcg_policy *pol);
 199
 200 const char *blkg_dev_name(struct blkcg_gq *blkg);
 201 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 202                        u64 (*prfill)(struct seq_file *,
 203                                      struct blkg_policy_data *, int),
 204                        const struct blkcg_policy *pol, int data,
 205                        bool show_total);
 206 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
 207 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 208                          const struct blkg_rwstat *rwstat);
 209 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
 210 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 211                        int off);
 212 int blkg_print_stat_bytes(struct seq_file *sf, void *v);
 213 int blkg_print_stat_ios(struct seq_file *sf, void *v);
 214 int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
 215 int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
 216
 217 u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
 218                             struct blkcg_policy *pol, int off);
 219 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
 220                                              struct blkcg_policy *pol, int off);
 221
 222 struct blkg_conf_ctx {
 223         struct gendisk                  *disk;
 224         struct blkcg_gq                 *blkg;
 225         char                            *body;
 226 };
 227
 228 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 229                    char *input, struct blkg_conf_ctx *ctx);
 230 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 231
 232
 233 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 234 {
 235         return css ? container_of(css, struct blkcg, css) : NULL;
 236 }
 237
 238 static inline struct blkcg *bio_blkcg(struct bio *bio)
 239 {
 240         struct cgroup_subsys_state *css;
 241
 242         if (bio && bio->bi_css)
 243                 return css_to_blkcg(bio->bi_css);
 244         css = kthread_blkcg();
 245         if (css)
 246                 return css_to_blkcg(css);
 247         return css_to_blkcg(task_css(current, io_cgrp_id));
 248 }
 249
 250 static inline bool blk_cgroup_congested(void)
 251 {
 252         struct cgroup_subsys_state *css;
 253         bool ret = false;
 254
 255         rcu_read_lock();
 256         css = kthread_blkcg();
 257         if (!css)
 258                 css = task_css(current, io_cgrp_id);
 259         while (css) {
 260                 if (atomic_read(&css->cgroup->congestion_count)) {
 261                         ret = true;
 262                         break;
 263                 }
 264                 css = css->parent;
 265         }
 266         rcu_read_unlock();
 267         return ret;
 268 }
 269
 270 /**
 271  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
 272  * @return: true if this bio needs to be submitted with the root blkg context.
 273  *
 274  * In order to avoid priority inversions we sometimes need to issue a bio as if
 275  * it were attached to the root blkg, and then backcharge to the actual owning
 276  * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
 277  * bio and attach the appropriate blkg to the bio.  Then we call this helper and
 278  * if it is true run with the root blkg for that queue and then do any
 279  * backcharging to the originating cgroup once the io is complete.
 280  */
 281 static inline bool bio_issue_as_root_blkg(struct bio *bio)
 282 {
 283         return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
 284 }
 285
 286 /**
 287  * blkcg_parent - get the parent of a blkcg
 288  * @blkcg: blkcg of interest
 289  *
 290  * Return the parent blkcg of @blkcg.  Can be called anytime.
 291  */
 292 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 293 {
 294         return css_to_blkcg(blkcg->css.parent);
 295 }
 296
 297 /**
 298  * __blkg_lookup - internal version of blkg_lookup()
 299  * @blkcg: blkcg of interest
 300  * @q: request_queue of interest
 301  * @update_hint: whether to update lookup hint with the result or not
 302  *
 303  * This is internal version and shouldn't be used by policy
 304  * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
 305  * @q's bypass state.  If @update_hint is %true, the caller should be
 306  * holding @q->queue_lock and lookup hint is updated on success.
 307  */
 308 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
 309                                              struct request_queue *q,
 310                                              bool update_hint)
 311 {
 312         struct blkcg_gq *blkg;
 313
 314         if (blkcg == &blkcg_root)
 315                 return q->root_blkg;
 316
 317         blkg = rcu_dereference(blkcg->blkg_hint);
 318         if (blkg && blkg->q == q)
 319                 return blkg;
 320
 321         return blkg_lookup_slowpath(blkcg, q, update_hint);
 322 }
 323
 324 /**
 325  * blkg_lookup - lookup blkg for the specified blkcg - q pair
 326  * @blkcg: blkcg of interest
 327  * @q: request_queue of interest
 328  *
 329  * Lookup blkg for the @blkcg - @q pair.  This function should be called
 330  * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
 331  * - see blk_queue_bypass_start() for details.
 332  */
 333 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 334                                            struct request_queue *q)
 335 {
 336         WARN_ON_ONCE(!rcu_read_lock_held());
 337
 338         if (unlikely(blk_queue_bypass(q)))
 339                 return NULL;
 340         return __blkg_lookup(blkcg, q, false);
 341 }
 342
 343 /**
 344  * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
 345  * @q: request_queue of interest
 346  *
 347  * Lookup blkg for @q at the root level. See also blkg_lookup().
 348  */
 349 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 350 {
 351         return q->root_blkg;
 352 }
 353
 354 /**
 355  * blkg_to_pdata - get policy private data
 356  * @blkg: blkg of interest
 357  * @pol: policy of interest
 358  *
 359  * Return pointer to private data associated with the @blkg-@pol pair.
 360  */
 361 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
 362                                                   struct blkcg_policy *pol)
 363 {
 364         return blkg ? blkg->pd[pol->plid] : NULL;
 365 }
 366
 367 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
 368                                                      struct blkcg_policy *pol)
 369 {
 370         return blkcg ? blkcg->cpd[pol->plid] : NULL;
 371 }
 372
 373 /**
 374  * pdata_to_blkg - get blkg associated with policy private data
 375  * @pd: policy private data of interest
 376  *
 377  * @pd is policy private data.  Determine the blkg it's associated with.
 378  */
 379 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
 380 {
 381         return pd ? pd->blkg : NULL;
 382 }
 383
 384 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 385 {
 386         return cpd ? cpd->blkcg : NULL;
 387 }
 388
 389 /**
 390  * blkg_path - format cgroup path of blkg
 391  * @blkg: blkg of interest
 392  * @buf: target buffer
 393  * @buflen: target buffer length
 394  *
 395  * Format the path of the cgroup of @blkg into @buf.
 396  */
 397 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
 398 {
 399         return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
 400 }
 401
 402 /**
 403  * blkg_get - get a blkg reference
 404  * @blkg: blkg to get
 405  *
 406  * The caller should be holding an existing reference.
 407  */
 408 static inline void blkg_get(struct blkcg_gq *blkg)
 409 {
 410         WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
 411         atomic_inc(&blkg->refcnt);
 412 }
 413
 414 /**
 415  * blkg_try_get - try and get a blkg reference
 416  * @blkg: blkg to get
 417  *
 418  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
 419  * of freeing this blkg, so we can only use it if the refcnt is not zero.
 420  */
 421 static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 422 {
 423         if (atomic_inc_not_zero(&blkg->refcnt))
 424                 return blkg;
 425         return NULL;
 426 }
 427
 428
 429 void __blkg_release_rcu(struct rcu_head *rcu);
 430
 431 /**
 432  * blkg_put - put a blkg reference
 433  * @blkg: blkg to put
 434  */
 435 static inline void blkg_put(struct blkcg_gq *blkg)
 436 {
 437         WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
 438         if (atomic_dec_and_test(&blkg->refcnt))
 439                 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
 440 }
 441
 442 /**
 443  * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
 444  * @d_blkg: loop cursor pointing to the current descendant
 445  * @pos_css: used for iteration
 446  * @p_blkg: target blkg to walk descendants of
 447  *
 448  * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
 449  * read locked.  If called under either blkcg or queue lock, the iteration
 450  * is guaranteed to include all and only online blkgs.  The caller may
 451  * update @pos_css by calling css_rightmost_descendant() to skip subtree.
 452  * @p_blkg is included in the iteration and the first node to be visited.
 453  */
 454 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)           \
 455         css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)   \
 456                 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
 457                                               (p_blkg)->q, false)))
 458
 459 /**
 460  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
 461  * @d_blkg: loop cursor pointing to the current descendant
 462  * @pos_css: used for iteration
 463  * @p_blkg: target blkg to walk descendants of
 464  *
 465  * Similar to blkg_for_each_descendant_pre() but performs post-order
 466  * traversal instead.  Synchronization rules are the same.  @p_blkg is
 467  * included in the iteration and the last node to be visited.
 468  */
 469 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)          \
 470         css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)  \
 471                 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
 472                                               (p_blkg)->q, false)))
 473
 474 /**
 475  * blk_get_rl - get request_list to use
 476  * @q: request_queue of interest
 477  * @bio: bio which will be attached to the allocated request (may be %NULL)
 478  *
 479  * The caller wants to allocate a request from @q to use for @bio.  Find
 480  * the request_list to use and obtain a reference on it.  Should be called
 481  * under queue_lock.  This function is guaranteed to return non-%NULL
 482  * request_list.
 483  */
 484 static inline struct request_list *blk_get_rl(struct request_queue *q,
 485                                               struct bio *bio)
 486 {
 487         struct blkcg *blkcg;
 488         struct blkcg_gq *blkg;
 489
 490         rcu_read_lock();
 491
 492         blkcg = bio_blkcg(bio);
 493
 494         /* bypass blkg lookup and use @q->root_rl directly for root */
 495         if (blkcg == &blkcg_root)
 496                 goto root_rl;
 497
 498         /*
 499          * Try to use blkg->rl.  blkg lookup may fail under memory pressure
 500          * or if either the blkcg or queue is going away.  Fall back to
 501          * root_rl in such cases.
 502          */
 503         blkg = blkg_lookup(blkcg, q);
 504         if (unlikely(!blkg))
 505                 goto root_rl;
 506
 507         blkg_get(blkg);
 508         rcu_read_unlock();
 509         return &blkg->rl;
 510 root_rl:
 511         rcu_read_unlock();
 512         return &q->root_rl;
 513 }
 514
 515 /**
 516  * blk_put_rl - put request_list
 517  * @rl: request_list to put
 518  *
 519  * Put the reference acquired by blk_get_rl().  Should be called under
 520  * queue_lock.
 521  */
 522 static inline void blk_put_rl(struct request_list *rl)
 523 {
 524         if (rl->blkg->blkcg != &blkcg_root)
 525                 blkg_put(rl->blkg);
 526 }
 527
 528 /**
 529  * blk_rq_set_rl - associate a request with a request_list
 530  * @rq: request of interest
 531  * @rl: target request_list
 532  *
 533  * Associate @rq with @rl so that accounting and freeing can know the
 534  * request_list @rq came from.
 535  */
 536 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
 537 {
 538         rq->rl = rl;
 539 }
 540
 541 /**
 542  * blk_rq_rl - return the request_list a request came from
 543  * @rq: request of interest
 544  *
 545  * Return the request_list @rq is allocated from.
 546  */
 547 static inline struct request_list *blk_rq_rl(struct request *rq)
 548 {
 549         return rq->rl;
 550 }
 551
 552 struct request_list *__blk_queue_next_rl(struct request_list *rl,
 553                                          struct request_queue *q);
 554 /**
 555  * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
 556  *
 557  * Should be used under queue_lock.
 558  */
 559 #define blk_queue_for_each_rl(rl, q)    \
 560         for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
 561
 562 static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
 563 {
 564         int ret;
 565
 566         ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
 567         if (ret)
 568                 return ret;
 569
 570         atomic64_set(&stat->aux_cnt, 0);
 571         return 0;
 572 }
 573
 574 static inline void blkg_stat_exit(struct blkg_stat *stat)
 575 {
 576         percpu_counter_destroy(&stat->cpu_cnt);
 577 }
 578
 579 /**
 580  * blkg_stat_add - add a value to a blkg_stat
 581  * @stat: target blkg_stat
 582  * @val: value to add
 583  *
 584  * Add @val to @stat.  The caller must ensure that IRQ on the same CPU
 585  * don't re-enter this function for the same counter.
 586  */
 587 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
 588 {
 589         percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
 590 }
 591
 592 /**
 593  * blkg_stat_read - read the current value of a blkg_stat
 594  * @stat: blkg_stat to read
 595  */
 596 static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
 597 {
 598         return percpu_counter_sum_positive(&stat->cpu_cnt);
 599 }
 600
 601 /**
 602  * blkg_stat_reset - reset a blkg_stat
 603  * @stat: blkg_stat to reset
 604  */
 605 static inline void blkg_stat_reset(struct blkg_stat *stat)
 606 {
 607         percpu_counter_set(&stat->cpu_cnt, 0);
 608         atomic64_set(&stat->aux_cnt, 0);
 609 }
 610
 611 /**
 612  * blkg_stat_add_aux - add a blkg_stat into another's aux count
 613  * @to: the destination blkg_stat
 614  * @from: the source
 615  *
 616  * Add @from's count including the aux one to @to's aux count.
 617  */
 618 static inline void blkg_stat_add_aux(struct blkg_stat *to,
 619                                      struct blkg_stat *from)
 620 {
 621         atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
 622                      &to->aux_cnt);
 623 }
 624
 625 static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
 626 {
 627         int i, ret;
 628
 629         for (i = 0; i < BLKG_RWSTAT_NR; i++) {
 630                 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
 631                 if (ret) {
 632                         while (--i >= 0)
 633                                 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
 634                         return ret;
 635                 }
 636                 atomic64_set(&rwstat->aux_cnt[i], 0);
 637         }
 638         return 0;
 639 }
 640
 641 static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
 642 {
 643         int i;
 644
 645         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 646                 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
 647 }
 648
 649 /**
 650  * blkg_rwstat_add - add a value to a blkg_rwstat
 651  * @rwstat: target blkg_rwstat
 652  * @op: REQ_OP and flags
 653  * @val: value to add
 654  *
 655  * Add @val to @rwstat.  The counters are chosen according to @rw.  The
 656  * caller is responsible for synchronizing calls to this function.
 657  */
 658 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 659                                    unsigned int op, uint64_t val)
 660 {
 661         struct percpu_counter *cnt;
 662
 663         if (op_is_discard(op))
 664                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
 665         else if (op_is_write(op))
 666                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
 667         else
 668                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
 669
 670         percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 671
 672         if (op_is_sync(op))
 673                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 674         else
 675                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
 676
 677         percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 678 }
 679
 680 /**
 681  * blkg_rwstat_read - read the current values of a blkg_rwstat
 682  * @rwstat: blkg_rwstat to read
 683  *
 684  * Read the current snapshot of @rwstat and return it in the aux counts.
 685  */
 686 static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
 687 {
 688         struct blkg_rwstat result;
 689         int i;
 690
 691         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 692                 atomic64_set(&result.aux_cnt[i],
 693                              percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
 694         return result;
 695 }
 696
 697 /**
 698  * blkg_rwstat_total - read the total count of a blkg_rwstat
 699  * @rwstat: blkg_rwstat to read
 700  *
 701  * Return the total count of @rwstat regardless of the IO direction.  This
 702  * function can be called without synchronization and takes care of u64
 703  * atomicity.
 704  */
 705 static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
 706 {
 707         struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
 708
 709         return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
 710                 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
 711 }
 712
 713 /**
 714  * blkg_rwstat_reset - reset a blkg_rwstat
 715  * @rwstat: blkg_rwstat to reset
 716  */
 717 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
 718 {
 719         int i;
 720
 721         for (i = 0; i < BLKG_RWSTAT_NR; i++) {
 722                 percpu_counter_set(&rwstat->cpu_cnt[i], 0);
 723                 atomic64_set(&rwstat->aux_cnt[i], 0);
 724         }
 725 }
 726
 727 /**
 728  * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
 729  * @to: the destination blkg_rwstat
 730  * @from: the source
 731  *
 732  * Add @from's count including the aux one to @to's aux count.
 733  */
 734 static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
 735                                        struct blkg_rwstat *from)
 736 {
 737         u64 sum[BLKG_RWSTAT_NR];
 738         int i;
 739
 740         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 741                 sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
 742
 743         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 744                 atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
 745                              &to->aux_cnt[i]);
 746 }
 747
 748 #ifdef CONFIG_BLK_DEV_THROTTLING
 749 extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 750                            struct bio *bio);
 751 #else
 752 static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 753                                   struct bio *bio) { return false; }
 754 #endif
 755
 756 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 757                                          struct bio *bio)
 758 {
 759         struct blkcg *blkcg;
 760         struct blkcg_gq *blkg;
 761         bool throtl = false;
 762
 763         rcu_read_lock();
 764         blkcg = bio_blkcg(bio);
 765
 766         /* associate blkcg if bio hasn't attached one */
 767         bio_associate_blkcg(bio, &blkcg->css);
 768
 769         blkg = blkg_lookup(blkcg, q);
 770         if (unlikely(!blkg)) {
 771                 spin_lock_irq(q->queue_lock);
 772                 blkg = blkg_lookup_create(blkcg, q);
 773                 if (IS_ERR(blkg))
 774                         blkg = NULL;
 775                 spin_unlock_irq(q->queue_lock);
 776         }
 777
 778         throtl = blk_throtl_bio(q, blkg, bio);
 779
 780         if (!throtl) {
 781                 blkg = blkg ?: q->root_blkg;
 782                 /*
 783                  * If the bio is flagged with BIO_QUEUE_ENTERED it means this
 784                  * is a split bio and we would have already accounted for the
 785                  * size of the bio.
 786                  */
 787                 if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
 788                         blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
 789                                         bio->bi_iter.bi_size);
 790                 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 791         }
 792
 793         rcu_read_unlock();
 794         return !throtl;
 795 }
 796
 797 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
 798 {
 799         if (atomic_add_return(1, &blkg->use_delay) == 1)
 800                 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
 801 }
 802
 803 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
 804 {
 805         int old = atomic_read(&blkg->use_delay);
 806
 807         if (old == 0)
 808                 return 0;
 809
 810         /*
 811          * We do this song and dance because we can race with somebody else
 812          * adding or removing delay.  If we just did an atomic_dec we'd end up
 813          * negative and we'd already be in trouble.  We need to subtract 1 and
 814          * then check to see if we were the last delay so we can drop the
 815          * congestion count on the cgroup.
 816          */
 817         while (old) {
 818                 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
 819                 if (cur == old)
 820                         break;
 821                 old = cur;
 822         }
 823
 824         if (old == 0)
 825                 return 0;
 826         if (old == 1)
 827                 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 828         return 1;
 829 }
 830
 831 static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
 832 {
 833         int old = atomic_read(&blkg->use_delay);
 834         if (!old)
 835                 return;
 836         /* We only want 1 person clearing the congestion count for this blkg. */
 837         while (old) {
 838                 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
 839                 if (cur == old) {
 840                         atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 841                         break;
 842                 }
 843                 old = cur;
 844         }
 845 }
 846
 847 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
 848 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
 849 void blkcg_maybe_throttle_current(void);
 850 #else   /* CONFIG_BLK_CGROUP */
 851
 852 struct blkcg {
 853 };
 854
 855 struct blkg_policy_data {
 856 };
 857
 858 struct blkcg_policy_data {
 859 };
 860
 861 struct blkcg_gq {
 862 };
 863
 864 struct blkcg_policy {
 865 };
 866
 867 #define blkcg_root_css  ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 868
 869 static inline void blkcg_maybe_throttle_current(void) { }
 870 static inline bool blk_cgroup_congested(void) { return false; }
 871
 872 #ifdef CONFIG_BLOCK
 873
 874 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 875
 876 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 877 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 878 { return NULL; }
 879 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 880 static inline void blkcg_drain_queue(struct request_queue *q) { }
 881 static inline void blkcg_exit_queue(struct request_queue *q) { }
 882 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 883 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 884 static inline int blkcg_activate_policy(struct request_queue *q,
 885                                         const struct blkcg_policy *pol) { return 0; }
 886 static inline void blkcg_deactivate_policy(struct request_queue *q,
 887                                            const struct blkcg_policy *pol) { }
 888
 889 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 890
 891 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
 892                                                   struct blkcg_policy *pol) { return NULL; }
 893 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
 894 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
 895 static inline void blkg_get(struct blkcg_gq *blkg) { }
 896 static inline void blkg_put(struct blkcg_gq *blkg) { }
 897
 898 static inline struct request_list *blk_get_rl(struct request_queue *q,
 899                                               struct bio *bio) { return &q->root_rl; }
 900 static inline void blk_put_rl(struct request_list *rl) { }
 901 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 902 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 903
 904 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 905                                          struct bio *bio) { return true; }
 906
 907 #define blk_queue_for_each_rl(rl, q)    \
 908         for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 909
 910 #endif  /* CONFIG_BLOCK */
 911 #endif  /* CONFIG_BLK_CGROUP */
 912 #endif  /* _BLK_CGROUP_H */