kernel/bpf/local_storage.c

   1 //SPDX-License-Identifier: GPL-2.0
   2 #include <linux/bpf-cgroup.h>
   3 #include <linux/bpf.h>
   4 #include <linux/bug.h>
   5 #include <linux/filter.h>
   6 #include <linux/mm.h>
   7 #include <linux/rbtree.h>
   8 #include <linux/slab.h>
   9
  10 DEFINE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
  11
  12 #ifdef CONFIG_CGROUP_BPF
  13
  14 #define LOCAL_STORAGE_CREATE_FLAG_MASK                                  \
  15         (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  16
  17 struct bpf_cgroup_storage_map {
  18         struct bpf_map map;
  19
  20         spinlock_t lock;
  21         struct bpf_prog *prog;
  22         struct rb_root root;
  23         struct list_head list;
  24 };
  25
  26 static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
  27 {
  28         return container_of(map, struct bpf_cgroup_storage_map, map);
  29 }
  30
  31 static int bpf_cgroup_storage_key_cmp(
  32         const struct bpf_cgroup_storage_key *key1,
  33         const struct bpf_cgroup_storage_key *key2)
  34 {
  35         if (key1->cgroup_inode_id < key2->cgroup_inode_id)
  36                 return -1;
  37         else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
  38                 return 1;
  39         else if (key1->attach_type < key2->attach_type)
  40                 return -1;
  41         else if (key1->attach_type > key2->attach_type)
  42                 return 1;
  43         return 0;
  44 }
  45
  46 static struct bpf_cgroup_storage *cgroup_storage_lookup(
  47         struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
  48         bool locked)
  49 {
  50         struct rb_root *root = &map->root;
  51         struct rb_node *node;
  52
  53         if (!locked)
  54                 spin_lock_bh(&map->lock);
  55
  56         node = root->rb_node;
  57         while (node) {
  58                 struct bpf_cgroup_storage *storage;
  59
  60                 storage = container_of(node, struct bpf_cgroup_storage, node);
  61
  62                 switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
  63                 case -1:
  64                         node = node->rb_left;
  65                         break;
  66                 case 1:
  67                         node = node->rb_right;
  68                         break;
  69                 default:
  70                         if (!locked)
  71                                 spin_unlock_bh(&map->lock);
  72                         return storage;
  73                 }
  74         }
  75
  76         if (!locked)
  77                 spin_unlock_bh(&map->lock);
  78
  79         return NULL;
  80 }
  81
  82 static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
  83                                  struct bpf_cgroup_storage *storage)
  84 {
  85         struct rb_root *root = &map->root;
  86         struct rb_node **new = &(root->rb_node), *parent = NULL;
  87
  88         while (*new) {
  89                 struct bpf_cgroup_storage *this;
  90
  91                 this = container_of(*new, struct bpf_cgroup_storage, node);
  92
  93                 parent = *new;
  94                 switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
  95                 case -1:
  96                         new = &((*new)->rb_left);
  97                         break;
  98                 case 1:
  99                         new = &((*new)->rb_right);
 100                         break;
 101                 default:
 102                         return -EEXIST;
 103                 }
 104         }
 105
 106         rb_link_node(&storage->node, parent, new);
 107         rb_insert_color(&storage->node, root);
 108
 109         return 0;
 110 }
 111
 112 static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
 113 {
 114         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 115         struct bpf_cgroup_storage_key *key = _key;
 116         struct bpf_cgroup_storage *storage;
 117
 118         storage = cgroup_storage_lookup(map, key, false);
 119         if (!storage)
 120                 return NULL;
 121
 122         return &READ_ONCE(storage->buf)->data[0];
 123 }
 124
 125 static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 126                                       void *value, u64 flags)
 127 {
 128         struct bpf_cgroup_storage_key *key = _key;
 129         struct bpf_cgroup_storage *storage;
 130         struct bpf_storage_buffer *new;
 131
 132         if (flags & BPF_NOEXIST)
 133                 return -EINVAL;
 134
 135         storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
 136                                         key, false);
 137         if (!storage)
 138                 return -ENOENT;
 139
 140         new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
 141                            map->value_size, __GFP_ZERO | GFP_USER,
 142                            map->numa_node);
 143         if (!new)
 144                 return -ENOMEM;
 145
 146         memcpy(&new->data[0], value, map->value_size);
 147
 148         new = xchg(&storage->buf, new);
 149         kfree_rcu(new, rcu);
 150
 151         return 0;
 152 }
 153
 154 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
 155                                        void *_next_key)
 156 {
 157         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 158         struct bpf_cgroup_storage_key *key = _key;
 159         struct bpf_cgroup_storage_key *next = _next_key;
 160         struct bpf_cgroup_storage *storage;
 161
 162         spin_lock_bh(&map->lock);
 163
 164         if (list_empty(&map->list))
 165                 goto enoent;
 166
 167         if (key) {
 168                 storage = cgroup_storage_lookup(map, key, true);
 169                 if (!storage)
 170                         goto enoent;
 171
 172                 storage = list_next_entry(storage, list);
 173                 if (!storage)
 174                         goto enoent;
 175         } else {
 176                 storage = list_first_entry(&map->list,
 177                                          struct bpf_cgroup_storage, list);
 178         }
 179
 180         spin_unlock_bh(&map->lock);
 181         next->attach_type = storage->key.attach_type;
 182         next->cgroup_inode_id = storage->key.cgroup_inode_id;
 183         return 0;
 184
 185 enoent:
 186         spin_unlock_bh(&map->lock);
 187         return -ENOENT;
 188 }
 189
 190 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 191 {
 192         int numa_node = bpf_map_attr_numa_node(attr);
 193         struct bpf_cgroup_storage_map *map;
 194
 195         if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
 196                 return ERR_PTR(-EINVAL);
 197
 198         if (attr->value_size > PAGE_SIZE)
 199                 return ERR_PTR(-E2BIG);
 200
 201         if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
 202                 /* reserved bits should not be used */
 203                 return ERR_PTR(-EINVAL);
 204
 205         if (attr->max_entries)
 206                 /* max_entries is not used and enforced to be 0 */
 207                 return ERR_PTR(-EINVAL);
 208
 209         map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
 210                            __GFP_ZERO | GFP_USER, numa_node);
 211         if (!map)
 212                 return ERR_PTR(-ENOMEM);
 213
 214         map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
 215                                   PAGE_SIZE) >> PAGE_SHIFT;
 216
 217         /* copy mandatory map attributes */
 218         bpf_map_init_from_attr(&map->map, attr);
 219
 220         spin_lock_init(&map->lock);
 221         map->root = RB_ROOT;
 222         INIT_LIST_HEAD(&map->list);
 223
 224         return &map->map;
 225 }
 226
 227 static void cgroup_storage_map_free(struct bpf_map *_map)
 228 {
 229         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 230
 231         WARN_ON(!RB_EMPTY_ROOT(&map->root));
 232         WARN_ON(!list_empty(&map->list));
 233
 234         kfree(map);
 235 }
 236
 237 static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
 238 {
 239         return -EINVAL;
 240 }
 241
 242 const struct bpf_map_ops cgroup_storage_map_ops = {
 243         .map_alloc = cgroup_storage_map_alloc,
 244         .map_free = cgroup_storage_map_free,
 245         .map_get_next_key = cgroup_storage_get_next_key,
 246         .map_lookup_elem = cgroup_storage_lookup_elem,
 247         .map_update_elem = cgroup_storage_update_elem,
 248         .map_delete_elem = cgroup_storage_delete_elem,
 249         .map_check_btf = map_check_no_btf,
 250 };
 251
 252 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 253 {
 254         enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 255         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 256         int ret = -EBUSY;
 257
 258         spin_lock_bh(&map->lock);
 259
 260         if (map->prog && map->prog != prog)
 261                 goto unlock;
 262         if (prog->aux->cgroup_storage[stype] &&
 263             prog->aux->cgroup_storage[stype] != _map)
 264                 goto unlock;
 265
 266         map->prog = prog;
 267         prog->aux->cgroup_storage[stype] = _map;
 268         ret = 0;
 269 unlock:
 270         spin_unlock_bh(&map->lock);
 271
 272         return ret;
 273 }
 274
 275 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
 276 {
 277         enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 278         struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 279
 280         spin_lock_bh(&map->lock);
 281         if (map->prog == prog) {
 282                 WARN_ON(prog->aux->cgroup_storage[stype] != _map);
 283                 map->prog = NULL;
 284                 prog->aux->cgroup_storage[stype] = NULL;
 285         }
 286         spin_unlock_bh(&map->lock);
 287 }
 288
 289 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
 290                                         enum bpf_cgroup_storage_type stype)
 291 {
 292         struct bpf_cgroup_storage *storage;
 293         struct bpf_map *map;
 294         u32 pages;
 295
 296         map = prog->aux->cgroup_storage[stype];
 297         if (!map)
 298                 return NULL;
 299
 300         pages = round_up(sizeof(struct bpf_cgroup_storage) +
 301                          sizeof(struct bpf_storage_buffer) +
 302                          map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
 303         if (bpf_map_charge_memlock(map, pages))
 304                 return ERR_PTR(-EPERM);
 305
 306         storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
 307                                __GFP_ZERO | GFP_USER, map->numa_node);
 308         if (!storage) {
 309                 bpf_map_uncharge_memlock(map, pages);
 310                 return ERR_PTR(-ENOMEM);
 311         }
 312
 313         storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
 314                                     map->value_size, __GFP_ZERO | GFP_USER,
 315                                     map->numa_node);
 316         if (!storage->buf) {
 317                 bpf_map_uncharge_memlock(map, pages);
 318                 kfree(storage);
 319                 return ERR_PTR(-ENOMEM);
 320         }
 321
 322         storage->map = (struct bpf_cgroup_storage_map *)map;
 323
 324         return storage;
 325 }
 326
 327 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
 328 {
 329         u32 pages;
 330         struct bpf_map *map;
 331
 332         if (!storage)
 333                 return;
 334
 335         map = &storage->map->map;
 336         pages = round_up(sizeof(struct bpf_cgroup_storage) +
 337                          sizeof(struct bpf_storage_buffer) +
 338                          map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
 339         bpf_map_uncharge_memlock(map, pages);
 340
 341         kfree_rcu(storage->buf, rcu);
 342         kfree_rcu(storage, rcu);
 343 }
 344
 345 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
 346                              struct cgroup *cgroup,
 347                              enum bpf_attach_type type)
 348 {
 349         struct bpf_cgroup_storage_map *map;
 350
 351         if (!storage)
 352                 return;
 353
 354         storage->key.attach_type = type;
 355         storage->key.cgroup_inode_id = cgroup->kn->id.id;
 356
 357         map = storage->map;
 358
 359         spin_lock_bh(&map->lock);
 360         WARN_ON(cgroup_storage_insert(map, storage));
 361         list_add(&storage->list, &map->list);
 362         spin_unlock_bh(&map->lock);
 363 }
 364
 365 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
 366 {
 367         struct bpf_cgroup_storage_map *map;
 368         struct rb_root *root;
 369
 370         if (!storage)
 371                 return;
 372
 373         map = storage->map;
 374
 375         spin_lock_bh(&map->lock);
 376         root = &map->root;
 377         rb_erase(&storage->node, root);
 378
 379         list_del(&storage->list);
 380         spin_unlock_bh(&map->lock);
 381 }
 382
 383 #endif