kernel/padata.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * padata.c - generic interface to process data streams in parallel
   4  *
   5  * See Documentation/padata.txt for an api documentation.
   6  *
   7  * Copyright (C) 2008, 2009 secunet Security Networks AG
   8  * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms and conditions of the GNU General Public License,
  12  * version 2, as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope it will be useful, but WITHOUT
  15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  17  * more details.
  18  *
  19  * You should have received a copy of the GNU General Public License along with
  20  * this program; if not, write to the Free Software Foundation, Inc.,
  21  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  22  */
  23
  24 #include <linux/export.h>
  25 #include <linux/cpumask.h>
  26 #include <linux/err.h>
  27 #include <linux/cpu.h>
  28 #include <linux/padata.h>
  29 #include <linux/mutex.h>
  30 #include <linux/sched.h>
  31 #include <linux/slab.h>
  32 #include <linux/sysfs.h>
  33 #include <linux/rcupdate.h>
  34 #include <linux/module.h>
  35
  36 #define MAX_OBJ_NUM 1000
  37
  38 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
  39 {
  40         int cpu, target_cpu;
  41
  42         target_cpu = cpumask_first(pd->cpumask.pcpu);
  43         for (cpu = 0; cpu < cpu_index; cpu++)
  44                 target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
  45
  46         return target_cpu;
  47 }
  48
  49 static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
  50 {
  51         /*
  52          * Hash the sequence numbers to the cpus by taking
  53          * seq_nr mod. number of cpus in use.
  54          */
  55         int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
  56
  57         return padata_index_to_cpu(pd, cpu_index);
  58 }
  59
  60 static void padata_parallel_worker(struct work_struct *parallel_work)
  61 {
  62         struct padata_parallel_queue *pqueue;
  63         LIST_HEAD(local_list);
  64
  65         local_bh_disable();
  66         pqueue = container_of(parallel_work,
  67                               struct padata_parallel_queue, work);
  68
  69         spin_lock(&pqueue->parallel.lock);
  70         list_replace_init(&pqueue->parallel.list, &local_list);
  71         spin_unlock(&pqueue->parallel.lock);
  72
  73         while (!list_empty(&local_list)) {
  74                 struct padata_priv *padata;
  75
  76                 padata = list_entry(local_list.next,
  77                                     struct padata_priv, list);
  78
  79                 list_del_init(&padata->list);
  80
  81                 padata->parallel(padata);
  82         }
  83
  84         local_bh_enable();
  85 }
  86
  87 /**
  88  * padata_do_parallel - padata parallelization function
  89  *
  90  * @pinst: padata instance
  91  * @padata: object to be parallelized
  92  * @cb_cpu: pointer to the CPU that the serialization callback function should
  93  *          run on.  If it's not in the serial cpumask of @pinst
  94  *          (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
  95  *          none found, returns -EINVAL.
  96  *
  97  * The parallelization callback function will run with BHs off.
  98  * Note: Every object which is parallelized by padata_do_parallel
  99  * must be seen by padata_do_serial.
 100  */
 101 int padata_do_parallel(struct padata_instance *pinst,
 102                        struct padata_priv *padata, int *cb_cpu)
 103 {
 104         int i, cpu, cpu_index, target_cpu, err;
 105         struct padata_parallel_queue *queue;
 106         struct parallel_data *pd;
 107
 108         rcu_read_lock_bh();
 109
 110         pd = rcu_dereference_bh(pinst->pd);
 111
 112         err = -EINVAL;
 113         if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
 114                 goto out;
 115
 116         if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
 117                 if (!cpumask_weight(pd->cpumask.cbcpu))
 118                         goto out;
 119
 120                 /* Select an alternate fallback CPU and notify the caller. */
 121                 cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu);
 122
 123                 cpu = cpumask_first(pd->cpumask.cbcpu);
 124                 for (i = 0; i < cpu_index; i++)
 125                         cpu = cpumask_next(cpu, pd->cpumask.cbcpu);
 126
 127                 *cb_cpu = cpu;
 128         }
 129
 130         err =  -EBUSY;
 131         if ((pinst->flags & PADATA_RESET))
 132                 goto out;
 133
 134         if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 135                 goto out;
 136
 137         err = 0;
 138         atomic_inc(&pd->refcnt);
 139         padata->pd = pd;
 140         padata->cb_cpu = *cb_cpu;
 141
 142         padata->seq_nr = atomic_inc_return(&pd->seq_nr);
 143         target_cpu = padata_cpu_hash(pd, padata->seq_nr);
 144         padata->cpu = target_cpu;
 145         queue = per_cpu_ptr(pd->pqueue, target_cpu);
 146
 147         spin_lock(&queue->parallel.lock);
 148         list_add_tail(&padata->list, &queue->parallel.list);
 149         spin_unlock(&queue->parallel.lock);
 150
 151         queue_work(pinst->parallel_wq, &queue->work);
 152
 153 out:
 154         rcu_read_unlock_bh();
 155
 156         return err;
 157 }
 158 EXPORT_SYMBOL(padata_do_parallel);
 159
 160 /*
 161  * padata_find_next - Find the next object that needs serialization.
 162  *
 163  * Return values are:
 164  *
 165  * A pointer to the control struct of the next object that needs
 166  * serialization, if present in one of the percpu reorder queues.
 167  *
 168  * NULL, if the next object that needs serialization will
 169  *  be parallel processed by another cpu and is not yet present in
 170  *  the cpu's reorder queue.
 171  */
 172 static struct padata_priv *padata_find_next(struct parallel_data *pd,
 173                                             bool remove_object)
 174 {
 175         struct padata_parallel_queue *next_queue;
 176         struct padata_priv *padata;
 177         struct padata_list *reorder;
 178         int cpu = pd->cpu;
 179
 180         next_queue = per_cpu_ptr(pd->pqueue, cpu);
 181         reorder = &next_queue->reorder;
 182
 183         spin_lock(&reorder->lock);
 184         if (list_empty(&reorder->list)) {
 185                 spin_unlock(&reorder->lock);
 186                 return NULL;
 187         }
 188
 189         padata = list_entry(reorder->list.next, struct padata_priv, list);
 190
 191         /*
 192          * Checks the rare case where two or more parallel jobs have hashed to
 193          * the same CPU and one of the later ones finishes first.
 194          */
 195         if (padata->seq_nr != pd->processed) {
 196                 spin_unlock(&reorder->lock);
 197                 return NULL;
 198         }
 199
 200         if (remove_object) {
 201                 list_del_init(&padata->list);
 202                 atomic_dec(&pd->reorder_objects);
 203                 ++pd->processed;
 204                 pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
 205         }
 206
 207         spin_unlock(&reorder->lock);
 208         return padata;
 209 }
 210
 211 static void padata_reorder(struct parallel_data *pd)
 212 {
 213         int cb_cpu;
 214         struct padata_priv *padata;
 215         struct padata_serial_queue *squeue;
 216         struct padata_instance *pinst = pd->pinst;
 217         struct padata_parallel_queue *next_queue;
 218
 219         /*
 220          * We need to ensure that only one cpu can work on dequeueing of
 221          * the reorder queue the time. Calculating in which percpu reorder
 222          * queue the next object will arrive takes some time. A spinlock
 223          * would be highly contended. Also it is not clear in which order
 224          * the objects arrive to the reorder queues. So a cpu could wait to
 225          * get the lock just to notice that there is nothing to do at the
 226          * moment. Therefore we use a trylock and let the holder of the lock
 227          * care for all the objects enqueued during the holdtime of the lock.
 228          */
 229         if (!spin_trylock_bh(&pd->lock))
 230                 return;
 231
 232         while (1) {
 233                 padata = padata_find_next(pd, true);
 234
 235                 /*
 236                  * If the next object that needs serialization is parallel
 237                  * processed by another cpu and is still on it's way to the
 238                  * cpu's reorder queue, nothing to do for now.
 239                  */
 240                 if (!padata)
 241                         break;
 242
 243                 cb_cpu = padata->cb_cpu;
 244                 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
 245
 246                 spin_lock(&squeue->serial.lock);
 247                 list_add_tail(&padata->list, &squeue->serial.list);
 248                 spin_unlock(&squeue->serial.lock);
 249
 250                 queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work);
 251         }
 252
 253         spin_unlock_bh(&pd->lock);
 254
 255         /*
 256          * The next object that needs serialization might have arrived to
 257          * the reorder queues in the meantime.
 258          *
 259          * Ensure reorder queue is read after pd->lock is dropped so we see
 260          * new objects from another task in padata_do_serial.  Pairs with
 261          * smp_mb__after_atomic in padata_do_serial.
 262          */
 263         smp_mb();
 264
 265         next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
 266         if (!list_empty(&next_queue->reorder.list) &&
 267             padata_find_next(pd, false))
 268                 queue_work(pinst->serial_wq, &pd->reorder_work);
 269 }
 270
 271 static void invoke_padata_reorder(struct work_struct *work)
 272 {
 273         struct parallel_data *pd;
 274
 275         local_bh_disable();
 276         pd = container_of(work, struct parallel_data, reorder_work);
 277         padata_reorder(pd);
 278         local_bh_enable();
 279 }
 280
 281 static void padata_serial_worker(struct work_struct *serial_work)
 282 {
 283         struct padata_serial_queue *squeue;
 284         struct parallel_data *pd;
 285         LIST_HEAD(local_list);
 286
 287         local_bh_disable();
 288         squeue = container_of(serial_work, struct padata_serial_queue, work);
 289         pd = squeue->pd;
 290
 291         spin_lock(&squeue->serial.lock);
 292         list_replace_init(&squeue->serial.list, &local_list);
 293         spin_unlock(&squeue->serial.lock);
 294
 295         while (!list_empty(&local_list)) {
 296                 struct padata_priv *padata;
 297
 298                 padata = list_entry(local_list.next,
 299                                     struct padata_priv, list);
 300
 301                 list_del_init(&padata->list);
 302
 303                 padata->serial(padata);
 304                 atomic_dec(&pd->refcnt);
 305         }
 306         local_bh_enable();
 307 }
 308
 309 /**
 310  * padata_do_serial - padata serialization function
 311  *
 312  * @padata: object to be serialized.
 313  *
 314  * padata_do_serial must be called for every parallelized object.
 315  * The serialization callback function will run with BHs off.
 316  */
 317 void padata_do_serial(struct padata_priv *padata)
 318 {
 319         struct parallel_data *pd = padata->pd;
 320         struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
 321                                                            padata->cpu);
 322         struct padata_priv *cur;
 323
 324         spin_lock(&pqueue->reorder.lock);
 325         /* Sort in ascending order of sequence number. */
 326         list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
 327                 if (cur->seq_nr < padata->seq_nr)
 328                         break;
 329         list_add(&padata->list, &cur->list);
 330         atomic_inc(&pd->reorder_objects);
 331         spin_unlock(&pqueue->reorder.lock);
 332
 333         /*
 334          * Ensure the addition to the reorder list is ordered correctly
 335          * with the trylock of pd->lock in padata_reorder.  Pairs with smp_mb
 336          * in padata_reorder.
 337          */
 338         smp_mb__after_atomic();
 339
 340         padata_reorder(pd);
 341 }
 342 EXPORT_SYMBOL(padata_do_serial);
 343
 344 static int padata_setup_cpumasks(struct parallel_data *pd,
 345                                  const struct cpumask *pcpumask,
 346                                  const struct cpumask *cbcpumask)
 347 {
 348         struct workqueue_attrs *attrs;
 349         int err = -ENOMEM;
 350
 351         if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
 352                 goto out;
 353         cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
 354
 355         if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
 356                 goto free_pcpu_mask;
 357         cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
 358
 359         attrs = alloc_workqueue_attrs();
 360         if (!attrs)
 361                 goto free_cbcpu_mask;
 362
 363         /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
 364         cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
 365         err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
 366         free_workqueue_attrs(attrs);
 367         if (err < 0)
 368                 goto free_cbcpu_mask;
 369
 370         return 0;
 371
 372 free_cbcpu_mask:
 373         free_cpumask_var(pd->cpumask.cbcpu);
 374 free_pcpu_mask:
 375         free_cpumask_var(pd->cpumask.pcpu);
 376 out:
 377         return err;
 378 }
 379
 380 static void __padata_list_init(struct padata_list *pd_list)
 381 {
 382         INIT_LIST_HEAD(&pd_list->list);
 383         spin_lock_init(&pd_list->lock);
 384 }
 385
 386 /* Initialize all percpu queues used by serial workers */
 387 static void padata_init_squeues(struct parallel_data *pd)
 388 {
 389         int cpu;
 390         struct padata_serial_queue *squeue;
 391
 392         for_each_cpu(cpu, pd->cpumask.cbcpu) {
 393                 squeue = per_cpu_ptr(pd->squeue, cpu);
 394                 squeue->pd = pd;
 395                 __padata_list_init(&squeue->serial);
 396                 INIT_WORK(&squeue->work, padata_serial_worker);
 397         }
 398 }
 399
 400 /* Initialize all percpu queues used by parallel workers */
 401 static void padata_init_pqueues(struct parallel_data *pd)
 402 {
 403         int cpu_index, cpu;
 404         struct padata_parallel_queue *pqueue;
 405
 406         cpu_index = 0;
 407         for_each_possible_cpu(cpu) {
 408                 pqueue = per_cpu_ptr(pd->pqueue, cpu);
 409
 410                 if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
 411                         pqueue->cpu_index = -1;
 412                         continue;
 413                 }
 414
 415                 pqueue->cpu_index = cpu_index;
 416                 cpu_index++;
 417
 418                 __padata_list_init(&pqueue->reorder);
 419                 __padata_list_init(&pqueue->parallel);
 420                 INIT_WORK(&pqueue->work, padata_parallel_worker);
 421                 atomic_set(&pqueue->num_obj, 0);
 422         }
 423 }
 424
 425 /* Allocate and initialize the internal cpumask dependend resources. */
 426 static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 427                                              const struct cpumask *pcpumask,
 428                                              const struct cpumask *cbcpumask)
 429 {
 430         struct parallel_data *pd;
 431
 432         pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
 433         if (!pd)
 434                 goto err;
 435
 436         pd->pqueue = alloc_percpu(struct padata_parallel_queue);
 437         if (!pd->pqueue)
 438                 goto err_free_pd;
 439
 440         pd->squeue = alloc_percpu(struct padata_serial_queue);
 441         if (!pd->squeue)
 442                 goto err_free_pqueue;
 443
 444         pd->pinst = pinst;
 445         if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
 446                 goto err_free_squeue;
 447
 448         padata_init_pqueues(pd);
 449         padata_init_squeues(pd);
 450         atomic_set(&pd->seq_nr, -1);
 451         atomic_set(&pd->reorder_objects, 0);
 452         atomic_set(&pd->refcnt, 0);
 453         spin_lock_init(&pd->lock);
 454         pd->cpu = cpumask_first(pd->cpumask.pcpu);
 455         INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
 456
 457         return pd;
 458
 459 err_free_squeue:
 460         free_percpu(pd->squeue);
 461 err_free_pqueue:
 462         free_percpu(pd->pqueue);
 463 err_free_pd:
 464         kfree(pd);
 465 err:
 466         return NULL;
 467 }
 468
 469 static void padata_free_pd(struct parallel_data *pd)
 470 {
 471         free_cpumask_var(pd->cpumask.pcpu);
 472         free_cpumask_var(pd->cpumask.cbcpu);
 473         free_percpu(pd->pqueue);
 474         free_percpu(pd->squeue);
 475         kfree(pd);
 476 }
 477
 478 /* Flush all objects out of the padata queues. */
 479 static void padata_flush_queues(struct parallel_data *pd)
 480 {
 481         int cpu;
 482         struct padata_parallel_queue *pqueue;
 483         struct padata_serial_queue *squeue;
 484
 485         for_each_cpu(cpu, pd->cpumask.pcpu) {
 486                 pqueue = per_cpu_ptr(pd->pqueue, cpu);
 487                 flush_work(&pqueue->work);
 488         }
 489
 490         if (atomic_read(&pd->reorder_objects))
 491                 padata_reorder(pd);
 492
 493         for_each_cpu(cpu, pd->cpumask.cbcpu) {
 494                 squeue = per_cpu_ptr(pd->squeue, cpu);
 495                 flush_work(&squeue->work);
 496         }
 497
 498         BUG_ON(atomic_read(&pd->refcnt) != 0);
 499 }
 500
 501 static void __padata_start(struct padata_instance *pinst)
 502 {
 503         pinst->flags |= PADATA_INIT;
 504 }
 505
 506 static void __padata_stop(struct padata_instance *pinst)
 507 {
 508         if (!(pinst->flags & PADATA_INIT))
 509                 return;
 510
 511         pinst->flags &= ~PADATA_INIT;
 512
 513         synchronize_rcu();
 514
 515         get_online_cpus();
 516         padata_flush_queues(pinst->pd);
 517         put_online_cpus();
 518 }
 519
 520 /* Replace the internal control structure with a new one. */
 521 static void padata_replace(struct padata_instance *pinst,
 522                            struct parallel_data *pd_new)
 523 {
 524         struct parallel_data *pd_old = pinst->pd;
 525         int notification_mask = 0;
 526
 527         pinst->flags |= PADATA_RESET;
 528
 529         rcu_assign_pointer(pinst->pd, pd_new);
 530
 531         synchronize_rcu();
 532
 533         if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
 534                 notification_mask |= PADATA_CPU_PARALLEL;
 535         if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
 536                 notification_mask |= PADATA_CPU_SERIAL;
 537
 538         padata_flush_queues(pd_old);
 539         padata_free_pd(pd_old);
 540
 541         if (notification_mask)
 542                 blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
 543                                              notification_mask,
 544                                              &pd_new->cpumask);
 545
 546         pinst->flags &= ~PADATA_RESET;
 547 }
 548
 549 /**
 550  * padata_register_cpumask_notifier - Registers a notifier that will be called
 551  *                             if either pcpu or cbcpu or both cpumasks change.
 552  *
 553  * @pinst: A poineter to padata instance
 554  * @nblock: A pointer to notifier block.
 555  */
 556 int padata_register_cpumask_notifier(struct padata_instance *pinst,
 557                                      struct notifier_block *nblock)
 558 {
 559         return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
 560                                                 nblock);
 561 }
 562 EXPORT_SYMBOL(padata_register_cpumask_notifier);
 563
 564 /**
 565  * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
 566  *        registered earlier  using padata_register_cpumask_notifier
 567  *
 568  * @pinst: A pointer to data instance.
 569  * @nlock: A pointer to notifier block.
 570  */
 571 int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
 572                                        struct notifier_block *nblock)
 573 {
 574         return blocking_notifier_chain_unregister(
 575                 &pinst->cpumask_change_notifier,
 576                 nblock);
 577 }
 578 EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
 579
 580
 581 /* If cpumask contains no active cpu, we mark the instance as invalid. */
 582 static bool padata_validate_cpumask(struct padata_instance *pinst,
 583                                     const struct cpumask *cpumask)
 584 {
 585         if (!cpumask_intersects(cpumask, cpu_online_mask)) {
 586                 pinst->flags |= PADATA_INVALID;
 587                 return false;
 588         }
 589
 590         pinst->flags &= ~PADATA_INVALID;
 591         return true;
 592 }
 593
 594 static int __padata_set_cpumasks(struct padata_instance *pinst,
 595                                  cpumask_var_t pcpumask,
 596                                  cpumask_var_t cbcpumask)
 597 {
 598         int valid;
 599         struct parallel_data *pd;
 600
 601         valid = padata_validate_cpumask(pinst, pcpumask);
 602         if (!valid) {
 603                 __padata_stop(pinst);
 604                 goto out_replace;
 605         }
 606
 607         valid = padata_validate_cpumask(pinst, cbcpumask);
 608         if (!valid)
 609                 __padata_stop(pinst);
 610
 611 out_replace:
 612         pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 613         if (!pd)
 614                 return -ENOMEM;
 615
 616         cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 617         cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 618
 619         padata_replace(pinst, pd);
 620
 621         if (valid)
 622                 __padata_start(pinst);
 623
 624         return 0;
 625 }
 626
 627 /**
 628  * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
 629  *                     equivalent to @cpumask.
 630  *
 631  * @pinst: padata instance
 632  * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
 633  *                to parallel and serial cpumasks respectively.
 634  * @cpumask: the cpumask to use
 635  */
 636 int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 637                        cpumask_var_t cpumask)
 638 {
 639         struct cpumask *serial_mask, *parallel_mask;
 640         int err = -EINVAL;
 641
 642         mutex_lock(&pinst->lock);
 643         get_online_cpus();
 644
 645         switch (cpumask_type) {
 646         case PADATA_CPU_PARALLEL:
 647                 serial_mask = pinst->cpumask.cbcpu;
 648                 parallel_mask = cpumask;
 649                 break;
 650         case PADATA_CPU_SERIAL:
 651                 parallel_mask = pinst->cpumask.pcpu;
 652                 serial_mask = cpumask;
 653                 break;
 654         default:
 655                  goto out;
 656         }
 657
 658         err =  __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
 659
 660 out:
 661         put_online_cpus();
 662         mutex_unlock(&pinst->lock);
 663
 664         return err;
 665 }
 666 EXPORT_SYMBOL(padata_set_cpumask);
 667
 668 /**
 669  * padata_start - start the parallel processing
 670  *
 671  * @pinst: padata instance to start
 672  */
 673 int padata_start(struct padata_instance *pinst)
 674 {
 675         int err = 0;
 676
 677         mutex_lock(&pinst->lock);
 678
 679         if (pinst->flags & PADATA_INVALID)
 680                 err = -EINVAL;
 681
 682         __padata_start(pinst);
 683
 684         mutex_unlock(&pinst->lock);
 685
 686         return err;
 687 }
 688 EXPORT_SYMBOL(padata_start);
 689
 690 /**
 691  * padata_stop - stop the parallel processing
 692  *
 693  * @pinst: padata instance to stop
 694  */
 695 void padata_stop(struct padata_instance *pinst)
 696 {
 697         mutex_lock(&pinst->lock);
 698         __padata_stop(pinst);
 699         mutex_unlock(&pinst->lock);
 700 }
 701 EXPORT_SYMBOL(padata_stop);
 702
 703 #ifdef CONFIG_HOTPLUG_CPU
 704
 705 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 706 {
 707         struct parallel_data *pd;
 708
 709         if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 710                 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
 711                                      pinst->cpumask.cbcpu);
 712                 if (!pd)
 713                         return -ENOMEM;
 714
 715                 padata_replace(pinst, pd);
 716
 717                 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
 718                     padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
 719                         __padata_start(pinst);
 720         }
 721
 722         return 0;
 723 }
 724
 725 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 726 {
 727         struct parallel_data *pd = NULL;
 728
 729         if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 730
 731                 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
 732                     !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
 733                         __padata_stop(pinst);
 734
 735                 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
 736                                      pinst->cpumask.cbcpu);
 737                 if (!pd)
 738                         return -ENOMEM;
 739
 740                 padata_replace(pinst, pd);
 741
 742                 cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
 743                 cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
 744         }
 745
 746         return 0;
 747 }
 748
 749  /**
 750  * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
 751  *                     padata cpumasks.
 752  *
 753  * @pinst: padata instance
 754  * @cpu: cpu to remove
 755  * @mask: bitmask specifying from which cpumask @cpu should be removed
 756  *        The @mask may be any combination of the following flags:
 757  *          PADATA_CPU_SERIAL   - serial cpumask
 758  *          PADATA_CPU_PARALLEL - parallel cpumask
 759  */
 760 int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
 761 {
 762         int err;
 763
 764         if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
 765                 return -EINVAL;
 766
 767         mutex_lock(&pinst->lock);
 768
 769         get_online_cpus();
 770         if (mask & PADATA_CPU_SERIAL)
 771                 cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
 772         if (mask & PADATA_CPU_PARALLEL)
 773                 cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
 774
 775         err = __padata_remove_cpu(pinst, cpu);
 776         put_online_cpus();
 777
 778         mutex_unlock(&pinst->lock);
 779
 780         return err;
 781 }
 782 EXPORT_SYMBOL(padata_remove_cpu);
 783
 784 static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
 785 {
 786         return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
 787                 cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
 788 }
 789
 790 static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
 791 {
 792         struct padata_instance *pinst;
 793         int ret;
 794
 795         pinst = hlist_entry_safe(node, struct padata_instance, node);
 796         if (!pinst_has_cpu(pinst, cpu))
 797                 return 0;
 798
 799         mutex_lock(&pinst->lock);
 800         ret = __padata_add_cpu(pinst, cpu);
 801         mutex_unlock(&pinst->lock);
 802         return ret;
 803 }
 804
 805 static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
 806 {
 807         struct padata_instance *pinst;
 808         int ret;
 809
 810         pinst = hlist_entry_safe(node, struct padata_instance, node);
 811         if (!pinst_has_cpu(pinst, cpu))
 812                 return 0;
 813
 814         mutex_lock(&pinst->lock);
 815         ret = __padata_remove_cpu(pinst, cpu);
 816         mutex_unlock(&pinst->lock);
 817         return ret;
 818 }
 819
 820 static enum cpuhp_state hp_online;
 821 #endif
 822
 823 static void __padata_free(struct padata_instance *pinst)
 824 {
 825 #ifdef CONFIG_HOTPLUG_CPU
 826         cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
 827 #endif
 828
 829         padata_stop(pinst);
 830         padata_free_pd(pinst->pd);
 831         free_cpumask_var(pinst->cpumask.pcpu);
 832         free_cpumask_var(pinst->cpumask.cbcpu);
 833         destroy_workqueue(pinst->serial_wq);
 834         destroy_workqueue(pinst->parallel_wq);
 835         kfree(pinst);
 836 }
 837
 838 #define kobj2pinst(_kobj)                                       \
 839         container_of(_kobj, struct padata_instance, kobj)
 840 #define attr2pentry(_attr)                                      \
 841         container_of(_attr, struct padata_sysfs_entry, attr)
 842
 843 static void padata_sysfs_release(struct kobject *kobj)
 844 {
 845         struct padata_instance *pinst = kobj2pinst(kobj);
 846         __padata_free(pinst);
 847 }
 848
 849 struct padata_sysfs_entry {
 850         struct attribute attr;
 851         ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
 852         ssize_t (*store)(struct padata_instance *, struct attribute *,
 853                          const char *, size_t);
 854 };
 855
 856 static ssize_t show_cpumask(struct padata_instance *pinst,
 857                             struct attribute *attr,  char *buf)
 858 {
 859         struct cpumask *cpumask;
 860         ssize_t len;
 861
 862         mutex_lock(&pinst->lock);
 863         if (!strcmp(attr->name, "serial_cpumask"))
 864                 cpumask = pinst->cpumask.cbcpu;
 865         else
 866                 cpumask = pinst->cpumask.pcpu;
 867
 868         len = snprintf(buf, PAGE_SIZE, "%*pb\n",
 869                        nr_cpu_ids, cpumask_bits(cpumask));
 870         mutex_unlock(&pinst->lock);
 871         return len < PAGE_SIZE ? len : -EINVAL;
 872 }
 873
 874 static ssize_t store_cpumask(struct padata_instance *pinst,
 875                              struct attribute *attr,
 876                              const char *buf, size_t count)
 877 {
 878         cpumask_var_t new_cpumask;
 879         ssize_t ret;
 880         int mask_type;
 881
 882         if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
 883                 return -ENOMEM;
 884
 885         ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
 886                            nr_cpumask_bits);
 887         if (ret < 0)
 888                 goto out;
 889
 890         mask_type = !strcmp(attr->name, "serial_cpumask") ?
 891                 PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
 892         ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
 893         if (!ret)
 894                 ret = count;
 895
 896 out:
 897         free_cpumask_var(new_cpumask);
 898         return ret;
 899 }
 900
 901 #define PADATA_ATTR_RW(_name, _show_name, _store_name)          \
 902         static struct padata_sysfs_entry _name##_attr =         \
 903                 __ATTR(_name, 0644, _show_name, _store_name)
 904 #define PADATA_ATTR_RO(_name, _show_name)               \
 905         static struct padata_sysfs_entry _name##_attr = \
 906                 __ATTR(_name, 0400, _show_name, NULL)
 907
 908 PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
 909 PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
 910
 911 /*
 912  * Padata sysfs provides the following objects:
 913  * serial_cpumask   [RW] - cpumask for serial workers
 914  * parallel_cpumask [RW] - cpumask for parallel workers
 915  */
 916 static struct attribute *padata_default_attrs[] = {
 917         &serial_cpumask_attr.attr,
 918         &parallel_cpumask_attr.attr,
 919         NULL,
 920 };
 921 ATTRIBUTE_GROUPS(padata_default);
 922
 923 static ssize_t padata_sysfs_show(struct kobject *kobj,
 924                                  struct attribute *attr, char *buf)
 925 {
 926         struct padata_instance *pinst;
 927         struct padata_sysfs_entry *pentry;
 928         ssize_t ret = -EIO;
 929
 930         pinst = kobj2pinst(kobj);
 931         pentry = attr2pentry(attr);
 932         if (pentry->show)
 933                 ret = pentry->show(pinst, attr, buf);
 934
 935         return ret;
 936 }
 937
 938 static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
 939                                   const char *buf, size_t count)
 940 {
 941         struct padata_instance *pinst;
 942         struct padata_sysfs_entry *pentry;
 943         ssize_t ret = -EIO;
 944
 945         pinst = kobj2pinst(kobj);
 946         pentry = attr2pentry(attr);
 947         if (pentry->show)
 948                 ret = pentry->store(pinst, attr, buf, count);
 949
 950         return ret;
 951 }
 952
 953 static const struct sysfs_ops padata_sysfs_ops = {
 954         .show = padata_sysfs_show,
 955         .store = padata_sysfs_store,
 956 };
 957
 958 static struct kobj_type padata_attr_type = {
 959         .sysfs_ops = &padata_sysfs_ops,
 960         .default_groups = padata_default_groups,
 961         .release = padata_sysfs_release,
 962 };
 963
 964 /**
 965  * padata_alloc - allocate and initialize a padata instance and specify
 966  *                cpumasks for serial and parallel workers.
 967  *
 968  * @name: used to identify the instance
 969  * @pcpumask: cpumask that will be used for padata parallelization
 970  * @cbcpumask: cpumask that will be used for padata serialization
 971  */
 972 static struct padata_instance *padata_alloc(const char *name,
 973                                             const struct cpumask *pcpumask,
 974                                             const struct cpumask *cbcpumask)
 975 {
 976         struct padata_instance *pinst;
 977         struct parallel_data *pd = NULL;
 978
 979         pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
 980         if (!pinst)
 981                 goto err;
 982
 983         pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
 984                                              name);
 985         if (!pinst->parallel_wq)
 986                 goto err_free_inst;
 987
 988         get_online_cpus();
 989
 990         pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
 991                                            WQ_CPU_INTENSIVE, 1, name);
 992         if (!pinst->serial_wq)
 993                 goto err_put_cpus;
 994
 995         if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
 996                 goto err_free_serial_wq;
 997         if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
 998                 free_cpumask_var(pinst->cpumask.pcpu);
 999                 goto err_free_serial_wq;
1000         }
1001         if (!padata_validate_cpumask(pinst, pcpumask) ||
1002             !padata_validate_cpumask(pinst, cbcpumask))
1003                 goto err_free_masks;
1004
1005         pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
1006         if (!pd)
1007                 goto err_free_masks;
1008
1009         rcu_assign_pointer(pinst->pd, pd);
1010
1011         cpumask_copy(pinst->cpumask.pcpu, pcpumask);
1012         cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
1013
1014         pinst->flags = 0;
1015
1016         BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
1017         kobject_init(&pinst->kobj, &padata_attr_type);
1018         mutex_init(&pinst->lock);
1019
1020 #ifdef CONFIG_HOTPLUG_CPU
1021         cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
1022 #endif
1023
1024         put_online_cpus();
1025
1026         return pinst;
1027
1028 err_free_masks:
1029         free_cpumask_var(pinst->cpumask.pcpu);
1030         free_cpumask_var(pinst->cpumask.cbcpu);
1031 err_free_serial_wq:
1032         destroy_workqueue(pinst->serial_wq);
1033 err_put_cpus:
1034         put_online_cpus();
1035         destroy_workqueue(pinst->parallel_wq);
1036 err_free_inst:
1037         kfree(pinst);
1038 err:
1039         return NULL;
1040 }
1041
1042 /**
1043  * padata_alloc_possible - Allocate and initialize padata instance.
1044  *                         Use the cpu_possible_mask for serial and
1045  *                         parallel workers.
1046  *
1047  * @name: used to identify the instance
1048  */
1049 struct padata_instance *padata_alloc_possible(const char *name)
1050 {
1051         return padata_alloc(name, cpu_possible_mask, cpu_possible_mask);
1052 }
1053 EXPORT_SYMBOL(padata_alloc_possible);
1054
1055 /**
1056  * padata_free - free a padata instance
1057  *
1058  * @padata_inst: padata instance to free
1059  */
1060 void padata_free(struct padata_instance *pinst)
1061 {
1062         kobject_put(&pinst->kobj);
1063 }
1064 EXPORT_SYMBOL(padata_free);
1065
1066 #ifdef CONFIG_HOTPLUG_CPU
1067
1068 static __init int padata_driver_init(void)
1069 {
1070         int ret;
1071
1072         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
1073                                       padata_cpu_online,
1074                                       padata_cpu_prep_down);
1075         if (ret < 0)
1076                 return ret;
1077         hp_online = ret;
1078         return 0;
1079 }
1080 module_init(padata_driver_init);
1081
1082 static __exit void padata_driver_exit(void)
1083 {
1084         cpuhp_remove_multi_state(hp_online);
1085 }
1086 module_exit(padata_driver_exit);
1087 #endif