kernel/irq/affinity.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2016 Thomas Gleixner.
   4  * Copyright (C) 2016-2017 Christoph Hellwig.
   5  */
   6 #include <linux/interrupt.h>
   7 #include <linux/kernel.h>
   8 #include <linux/slab.h>
   9 #include <linux/cpu.h>
  10
  11 static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
  12                                 unsigned int cpus_per_vec)
  13 {
  14         const struct cpumask *siblmsk;
  15         int cpu, sibl;
  16
  17         for ( ; cpus_per_vec > 0; ) {
  18                 cpu = cpumask_first(nmsk);
  19
  20                 /* Should not happen, but I'm too lazy to think about it */
  21                 if (cpu >= nr_cpu_ids)
  22                         return;
  23
  24                 cpumask_clear_cpu(cpu, nmsk);
  25                 cpumask_set_cpu(cpu, irqmsk);
  26                 cpus_per_vec--;
  27
  28                 /* If the cpu has siblings, use them first */
  29                 siblmsk = topology_sibling_cpumask(cpu);
  30                 for (sibl = -1; cpus_per_vec > 0; ) {
  31                         sibl = cpumask_next(sibl, siblmsk);
  32                         if (sibl >= nr_cpu_ids)
  33                                 break;
  34                         if (!cpumask_test_and_clear_cpu(sibl, nmsk))
  35                                 continue;
  36                         cpumask_set_cpu(sibl, irqmsk);
  37                         cpus_per_vec--;
  38                 }
  39         }
  40 }
  41
  42 static cpumask_var_t *alloc_node_to_cpumask(void)
  43 {
  44         cpumask_var_t *masks;
  45         int node;
  46
  47         masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
  48         if (!masks)
  49                 return NULL;
  50
  51         for (node = 0; node < nr_node_ids; node++) {
  52                 if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
  53                         goto out_unwind;
  54         }
  55
  56         return masks;
  57
  58 out_unwind:
  59         while (--node >= 0)
  60                 free_cpumask_var(masks[node]);
  61         kfree(masks);
  62         return NULL;
  63 }
  64
  65 static void free_node_to_cpumask(cpumask_var_t *masks)
  66 {
  67         int node;
  68
  69         for (node = 0; node < nr_node_ids; node++)
  70                 free_cpumask_var(masks[node]);
  71         kfree(masks);
  72 }
  73
  74 static void build_node_to_cpumask(cpumask_var_t *masks)
  75 {
  76         int cpu;
  77
  78         for_each_possible_cpu(cpu)
  79                 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
  80 }
  81
  82 static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
  83                                 const struct cpumask *mask, nodemask_t *nodemsk)
  84 {
  85         int n, nodes = 0;
  86
  87         /* Calculate the number of nodes in the supplied affinity mask */
  88         for_each_node(n) {
  89                 if (cpumask_intersects(mask, node_to_cpumask[n])) {
  90                         node_set(n, *nodemsk);
  91                         nodes++;
  92                 }
  93         }
  94         return nodes;
  95 }
  96
  97 static int __irq_build_affinity_masks(unsigned int startvec,
  98                                       unsigned int numvecs,
  99                                       unsigned int firstvec,
 100                                       cpumask_var_t *node_to_cpumask,
 101                                       const struct cpumask *cpu_mask,
 102                                       struct cpumask *nmsk,
 103                                       struct irq_affinity_desc *masks)
 104 {
 105         unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;
 106         unsigned int last_affv = firstvec + numvecs;
 107         unsigned int curvec = startvec;
 108         nodemask_t nodemsk = NODE_MASK_NONE;
 109
 110         if (!cpumask_weight(cpu_mask))
 111                 return 0;
 112
 113         nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
 114
 115         /*
 116          * If the number of nodes in the mask is greater than or equal the
 117          * number of vectors we just spread the vectors across the nodes.
 118          */
 119         if (numvecs <= nodes) {
 120                 for_each_node_mask(n, nodemsk) {
 121                         cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
 122                                    node_to_cpumask[n]);
 123                         if (++curvec == last_affv)
 124                                 curvec = firstvec;
 125                 }
 126                 return numvecs;
 127         }
 128
 129         for_each_node_mask(n, nodemsk) {
 130                 unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
 131
 132                 /* Spread the vectors per node */
 133                 vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
 134
 135                 /* Get the cpus on this node which are in the mask */
 136                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
 137
 138                 /* Calculate the number of cpus per vector */
 139                 ncpus = cpumask_weight(nmsk);
 140                 vecs_to_assign = min(vecs_per_node, ncpus);
 141
 142                 /* Account for rounding errors */
 143                 extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
 144
 145                 for (v = 0; curvec < last_affv && v < vecs_to_assign;
 146                      curvec++, v++) {
 147                         cpus_per_vec = ncpus / vecs_to_assign;
 148
 149                         /* Account for extra vectors to compensate rounding errors */
 150                         if (extra_vecs) {
 151                                 cpus_per_vec++;
 152                                 --extra_vecs;
 153                         }
 154                         irq_spread_init_one(&masks[curvec].mask, nmsk,
 155                                                 cpus_per_vec);
 156                 }
 157
 158                 done += v;
 159                 if (done >= numvecs)
 160                         break;
 161                 if (curvec >= last_affv)
 162                         curvec = firstvec;
 163                 --nodes;
 164         }
 165         return done;
 166 }
 167
 168 /*
 169  * build affinity in two stages:
 170  *      1) spread present CPU on these vectors
 171  *      2) spread other possible CPUs on these vectors
 172  */
 173 static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
 174                                     unsigned int firstvec,
 175                                     struct irq_affinity_desc *masks)
 176 {
 177         unsigned int curvec = startvec, nr_present, nr_others;
 178         cpumask_var_t *node_to_cpumask;
 179         cpumask_var_t nmsk, npresmsk;
 180         int ret = -ENOMEM;
 181
 182         if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 183                 return ret;
 184
 185         if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
 186                 goto fail_nmsk;
 187
 188         node_to_cpumask = alloc_node_to_cpumask();
 189         if (!node_to_cpumask)
 190                 goto fail_npresmsk;
 191
 192         ret = 0;
 193         /* Stabilize the cpumasks */
 194         get_online_cpus();
 195         build_node_to_cpumask(node_to_cpumask);
 196
 197         /* Spread on present CPUs starting from affd->pre_vectors */
 198         nr_present = __irq_build_affinity_masks(curvec, numvecs,
 199                                                 firstvec, node_to_cpumask,
 200                                                 cpu_present_mask, nmsk, masks);
 201
 202         /*
 203          * Spread on non present CPUs starting from the next vector to be
 204          * handled. If the spreading of present CPUs already exhausted the
 205          * vector space, assign the non present CPUs to the already spread
 206          * out vectors.
 207          */
 208         if (nr_present >= numvecs)
 209                 curvec = firstvec;
 210         else
 211                 curvec = firstvec + nr_present;
 212         cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
 213         nr_others = __irq_build_affinity_masks(curvec, numvecs,
 214                                                firstvec, node_to_cpumask,
 215                                                npresmsk, nmsk, masks);
 216         put_online_cpus();
 217
 218         if (nr_present < numvecs)
 219                 WARN_ON(nr_present + nr_others < numvecs);
 220
 221         free_node_to_cpumask(node_to_cpumask);
 222
 223  fail_npresmsk:
 224         free_cpumask_var(npresmsk);
 225
 226  fail_nmsk:
 227         free_cpumask_var(nmsk);
 228         return ret;
 229 }
 230
 231 static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
 232 {
 233         affd->nr_sets = 1;
 234         affd->set_size[0] = affvecs;
 235 }
 236
 237 /**
 238  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
 239  * @nvecs:      The total number of vectors
 240  * @affd:       Description of the affinity requirements
 241  *
 242  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
 243  */
 244 struct irq_affinity_desc *
 245 irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
 246 {
 247         unsigned int affvecs, curvec, usedvecs, i;
 248         struct irq_affinity_desc *masks = NULL;
 249
 250         /*
 251          * Determine the number of vectors which need interrupt affinities
 252          * assigned. If the pre/post request exhausts the available vectors
 253          * then nothing to do here except for invoking the calc_sets()
 254          * callback so the device driver can adjust to the situation. If there
 255          * is only a single vector, then managing the queue is pointless as
 256          * well.
 257          */
 258         if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors)
 259                 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
 260         else
 261                 affvecs = 0;
 262
 263         /*
 264          * Simple invocations do not provide a calc_sets() callback. Install
 265          * the generic one.
 266          */
 267         if (!affd->calc_sets)
 268                 affd->calc_sets = default_calc_sets;
 269
 270         /* Recalculate the sets */
 271         affd->calc_sets(affd, affvecs);
 272
 273         if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
 274                 return NULL;
 275
 276         /* Nothing to assign? */
 277         if (!affvecs)
 278                 return NULL;
 279
 280         masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 281         if (!masks)
 282                 return NULL;
 283
 284         /* Fill out vectors at the beginning that don't need affinity */
 285         for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 286                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 287
 288         /*
 289          * Spread on present CPUs starting from affd->pre_vectors. If we
 290          * have multiple sets, build each sets affinity mask separately.
 291          */
 292         for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
 293                 unsigned int this_vecs = affd->set_size[i];
 294                 int ret;
 295
 296                 ret = irq_build_affinity_masks(curvec, this_vecs,
 297                                                curvec, masks);
 298                 if (ret) {
 299                         kfree(masks);
 300                         return NULL;
 301                 }
 302                 curvec += this_vecs;
 303                 usedvecs += this_vecs;
 304         }
 305
 306         /* Fill out vectors at the end that don't need affinity */
 307         if (usedvecs >= affvecs)
 308                 curvec = affd->pre_vectors + affvecs;
 309         else
 310                 curvec = affd->pre_vectors + usedvecs;
 311         for (; curvec < nvecs; curvec++)
 312                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 313
 314         /* Mark the managed interrupts */
 315         for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
 316                 masks[i].is_managed = 1;
 317
 318         return masks;
 319 }
 320
 321 /**
 322  * irq_calc_affinity_vectors - Calculate the optimal number of vectors
 323  * @minvec:     The minimum number of vectors available
 324  * @maxvec:     The maximum number of vectors available
 325  * @affd:       Description of the affinity requirements
 326  */
 327 unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 328                                        const struct irq_affinity *affd)
 329 {
 330         unsigned int resv = affd->pre_vectors + affd->post_vectors;
 331         unsigned int set_vecs;
 332
 333         if (resv > minvec)
 334                 return 0;
 335
 336         if (affd->calc_sets) {
 337                 set_vecs = maxvec - resv;
 338         } else {
 339                 get_online_cpus();
 340                 set_vecs = cpumask_weight(cpu_possible_mask);
 341                 put_online_cpus();
 342         }
 343
 344         return resv + min(set_vecs, maxvec - resv);
 345 }