]> asedeno.scripts.mit.edu Git - linux.git/blob - net/openvswitch/flow_netlink.c
wil6210: rate limit wil_rx_refill error
[linux.git] / net / openvswitch / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2017 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include "flow.h"
22 #include "datapath.h"
23 #include <linux/uaccess.h>
24 #include <linux/netdevice.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <net/llc_pdu.h>
29 #include <linux/kernel.h>
30 #include <linux/jhash.h>
31 #include <linux/jiffies.h>
32 #include <linux/llc.h>
33 #include <linux/module.h>
34 #include <linux/in.h>
35 #include <linux/rcupdate.h>
36 #include <linux/if_arp.h>
37 #include <linux/ip.h>
38 #include <linux/ipv6.h>
39 #include <linux/sctp.h>
40 #include <linux/tcp.h>
41 #include <linux/udp.h>
42 #include <linux/icmp.h>
43 #include <linux/icmpv6.h>
44 #include <linux/rculist.h>
45 #include <net/geneve.h>
46 #include <net/ip.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/mpls.h>
50 #include <net/vxlan.h>
51 #include <net/tun_proto.h>
52 #include <net/erspan.h>
53
54 #include "flow_netlink.h"
55
56 struct ovs_len_tbl {
57         int len;
58         const struct ovs_len_tbl *next;
59 };
60
61 #define OVS_ATTR_NESTED -1
62 #define OVS_ATTR_VARIABLE -2
63
64 static bool actions_may_change_flow(const struct nlattr *actions)
65 {
66         struct nlattr *nla;
67         int rem;
68
69         nla_for_each_nested(nla, actions, rem) {
70                 u16 action = nla_type(nla);
71
72                 switch (action) {
73                 case OVS_ACTION_ATTR_OUTPUT:
74                 case OVS_ACTION_ATTR_RECIRC:
75                 case OVS_ACTION_ATTR_TRUNC:
76                 case OVS_ACTION_ATTR_USERSPACE:
77                         break;
78
79                 case OVS_ACTION_ATTR_CT:
80                 case OVS_ACTION_ATTR_CT_CLEAR:
81                 case OVS_ACTION_ATTR_HASH:
82                 case OVS_ACTION_ATTR_POP_ETH:
83                 case OVS_ACTION_ATTR_POP_MPLS:
84                 case OVS_ACTION_ATTR_POP_NSH:
85                 case OVS_ACTION_ATTR_POP_VLAN:
86                 case OVS_ACTION_ATTR_PUSH_ETH:
87                 case OVS_ACTION_ATTR_PUSH_MPLS:
88                 case OVS_ACTION_ATTR_PUSH_NSH:
89                 case OVS_ACTION_ATTR_PUSH_VLAN:
90                 case OVS_ACTION_ATTR_SAMPLE:
91                 case OVS_ACTION_ATTR_SET:
92                 case OVS_ACTION_ATTR_SET_MASKED:
93                 case OVS_ACTION_ATTR_METER:
94                 default:
95                         return true;
96                 }
97         }
98         return false;
99 }
100
101 static void update_range(struct sw_flow_match *match,
102                          size_t offset, size_t size, bool is_mask)
103 {
104         struct sw_flow_key_range *range;
105         size_t start = rounddown(offset, sizeof(long));
106         size_t end = roundup(offset + size, sizeof(long));
107
108         if (!is_mask)
109                 range = &match->range;
110         else
111                 range = &match->mask->range;
112
113         if (range->start == range->end) {
114                 range->start = start;
115                 range->end = end;
116                 return;
117         }
118
119         if (range->start > start)
120                 range->start = start;
121
122         if (range->end < end)
123                 range->end = end;
124 }
125
126 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
127         do { \
128                 update_range(match, offsetof(struct sw_flow_key, field),    \
129                              sizeof((match)->key->field), is_mask);         \
130                 if (is_mask)                                                \
131                         (match)->mask->key.field = value;                   \
132                 else                                                        \
133                         (match)->key->field = value;                        \
134         } while (0)
135
136 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
137         do {                                                                \
138                 update_range(match, offset, len, is_mask);                  \
139                 if (is_mask)                                                \
140                         memcpy((u8 *)&(match)->mask->key + offset, value_p, \
141                                len);                                       \
142                 else                                                        \
143                         memcpy((u8 *)(match)->key + offset, value_p, len);  \
144         } while (0)
145
146 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
147         SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
148                                   value_p, len, is_mask)
149
150 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
151         do {                                                                \
152                 update_range(match, offsetof(struct sw_flow_key, field),    \
153                              sizeof((match)->key->field), is_mask);         \
154                 if (is_mask)                                                \
155                         memset((u8 *)&(match)->mask->key.field, value,      \
156                                sizeof((match)->mask->key.field));           \
157                 else                                                        \
158                         memset((u8 *)&(match)->key->field, value,           \
159                                sizeof((match)->key->field));                \
160         } while (0)
161
162 static bool match_validate(const struct sw_flow_match *match,
163                            u64 key_attrs, u64 mask_attrs, bool log)
164 {
165         u64 key_expected = 0;
166         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
167
168         /* The following mask attributes allowed only if they
169          * pass the validation tests. */
170         mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
171                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
172                         | (1 << OVS_KEY_ATTR_IPV6)
173                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
174                         | (1 << OVS_KEY_ATTR_TCP)
175                         | (1 << OVS_KEY_ATTR_TCP_FLAGS)
176                         | (1 << OVS_KEY_ATTR_UDP)
177                         | (1 << OVS_KEY_ATTR_SCTP)
178                         | (1 << OVS_KEY_ATTR_ICMP)
179                         | (1 << OVS_KEY_ATTR_ICMPV6)
180                         | (1 << OVS_KEY_ATTR_ARP)
181                         | (1 << OVS_KEY_ATTR_ND)
182                         | (1 << OVS_KEY_ATTR_MPLS)
183                         | (1 << OVS_KEY_ATTR_NSH));
184
185         /* Always allowed mask fields. */
186         mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
187                        | (1 << OVS_KEY_ATTR_IN_PORT)
188                        | (1 << OVS_KEY_ATTR_ETHERTYPE));
189
190         /* Check key attributes. */
191         if (match->key->eth.type == htons(ETH_P_ARP)
192                         || match->key->eth.type == htons(ETH_P_RARP)) {
193                 key_expected |= 1 << OVS_KEY_ATTR_ARP;
194                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
195                         mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
196         }
197
198         if (eth_p_mpls(match->key->eth.type)) {
199                 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
200                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
201                         mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
202         }
203
204         if (match->key->eth.type == htons(ETH_P_IP)) {
205                 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
206                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
207                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
208                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
209                 }
210
211                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
212                         if (match->key->ip.proto == IPPROTO_UDP) {
213                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
214                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
215                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
216                         }
217
218                         if (match->key->ip.proto == IPPROTO_SCTP) {
219                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
220                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
221                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
222                         }
223
224                         if (match->key->ip.proto == IPPROTO_TCP) {
225                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
226                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
227                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
228                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
229                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
230                                 }
231                         }
232
233                         if (match->key->ip.proto == IPPROTO_ICMP) {
234                                 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
235                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
236                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
237                         }
238                 }
239         }
240
241         if (match->key->eth.type == htons(ETH_P_IPV6)) {
242                 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
243                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
244                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
245                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
246                 }
247
248                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
249                         if (match->key->ip.proto == IPPROTO_UDP) {
250                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
251                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
252                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
253                         }
254
255                         if (match->key->ip.proto == IPPROTO_SCTP) {
256                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
257                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
258                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
259                         }
260
261                         if (match->key->ip.proto == IPPROTO_TCP) {
262                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
263                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
264                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
265                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
266                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
267                                 }
268                         }
269
270                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
271                                 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
272                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
273                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
274
275                                 if (match->key->tp.src ==
276                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
277                                     match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
278                                         key_expected |= 1 << OVS_KEY_ATTR_ND;
279                                         /* Original direction conntrack tuple
280                                          * uses the same space as the ND fields
281                                          * in the key, so both are not allowed
282                                          * at the same time.
283                                          */
284                                         mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
285                                         if (match->mask && (match->mask->key.tp.src == htons(0xff)))
286                                                 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
287                                 }
288                         }
289                 }
290         }
291
292         if (match->key->eth.type == htons(ETH_P_NSH)) {
293                 key_expected |= 1 << OVS_KEY_ATTR_NSH;
294                 if (match->mask &&
295                     match->mask->key.eth.type == htons(0xffff)) {
296                         mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
297                 }
298         }
299
300         if ((key_attrs & key_expected) != key_expected) {
301                 /* Key attributes check failed. */
302                 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
303                           (unsigned long long)key_attrs,
304                           (unsigned long long)key_expected);
305                 return false;
306         }
307
308         if ((mask_attrs & mask_allowed) != mask_attrs) {
309                 /* Mask attributes check failed. */
310                 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
311                           (unsigned long long)mask_attrs,
312                           (unsigned long long)mask_allowed);
313                 return false;
314         }
315
316         return true;
317 }
318
319 size_t ovs_tun_key_attr_size(void)
320 {
321         /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
322          * updating this function.
323          */
324         return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
325                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
326                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
327                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
328                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
329                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
330                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
331                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
332                 + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
333                 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
334                  * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
335                  * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
336                  */
337                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
338                 + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
339 }
340
341 static size_t ovs_nsh_key_attr_size(void)
342 {
343         /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
344          * updating this function.
345          */
346         return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
347                 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
348                  * mutually exclusive, so the bigger one can cover
349                  * the small one.
350                  */
351                 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
352 }
353
354 size_t ovs_key_attr_size(void)
355 {
356         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
357          * updating this function.
358          */
359         BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
360
361         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
362                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
363                   + ovs_tun_key_attr_size()
364                 + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
365                 + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
366                 + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
367                 + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
368                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
369                 + nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
370                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
371                 + nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
372                 + nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
373                 + nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
374                   + ovs_nsh_key_attr_size()
375                 + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
376                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
377                 + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
378                 + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
379                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
380                 + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
381                 + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
382                 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
383 }
384
385 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
386         [OVS_VXLAN_EXT_GBP]         = { .len = sizeof(u32) },
387 };
388
389 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
390         [OVS_TUNNEL_KEY_ATTR_ID]            = { .len = sizeof(u64) },
391         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC]      = { .len = sizeof(u32) },
392         [OVS_TUNNEL_KEY_ATTR_IPV4_DST]      = { .len = sizeof(u32) },
393         [OVS_TUNNEL_KEY_ATTR_TOS]           = { .len = 1 },
394         [OVS_TUNNEL_KEY_ATTR_TTL]           = { .len = 1 },
395         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
396         [OVS_TUNNEL_KEY_ATTR_CSUM]          = { .len = 0 },
397         [OVS_TUNNEL_KEY_ATTR_TP_SRC]        = { .len = sizeof(u16) },
398         [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
399         [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
400         [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
401         [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
402                                                 .next = ovs_vxlan_ext_key_lens },
403         [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
404         [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
405         [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
406 };
407
408 static const struct ovs_len_tbl
409 ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
410         [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
411         [OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
412         [OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
413 };
414
415 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
416 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
417         [OVS_KEY_ATTR_ENCAP]     = { .len = OVS_ATTR_NESTED },
418         [OVS_KEY_ATTR_PRIORITY]  = { .len = sizeof(u32) },
419         [OVS_KEY_ATTR_IN_PORT]   = { .len = sizeof(u32) },
420         [OVS_KEY_ATTR_SKB_MARK]  = { .len = sizeof(u32) },
421         [OVS_KEY_ATTR_ETHERNET]  = { .len = sizeof(struct ovs_key_ethernet) },
422         [OVS_KEY_ATTR_VLAN]      = { .len = sizeof(__be16) },
423         [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
424         [OVS_KEY_ATTR_IPV4]      = { .len = sizeof(struct ovs_key_ipv4) },
425         [OVS_KEY_ATTR_IPV6]      = { .len = sizeof(struct ovs_key_ipv6) },
426         [OVS_KEY_ATTR_TCP]       = { .len = sizeof(struct ovs_key_tcp) },
427         [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
428         [OVS_KEY_ATTR_UDP]       = { .len = sizeof(struct ovs_key_udp) },
429         [OVS_KEY_ATTR_SCTP]      = { .len = sizeof(struct ovs_key_sctp) },
430         [OVS_KEY_ATTR_ICMP]      = { .len = sizeof(struct ovs_key_icmp) },
431         [OVS_KEY_ATTR_ICMPV6]    = { .len = sizeof(struct ovs_key_icmpv6) },
432         [OVS_KEY_ATTR_ARP]       = { .len = sizeof(struct ovs_key_arp) },
433         [OVS_KEY_ATTR_ND]        = { .len = sizeof(struct ovs_key_nd) },
434         [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
435         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
436         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
437                                      .next = ovs_tunnel_key_lens, },
438         [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
439         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
440         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
441         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
442         [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
443         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
444                 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
445         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
446                 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
447         [OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
448                                      .next = ovs_nsh_key_attr_lens, },
449 };
450
451 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
452 {
453         return expected_len == attr_len ||
454                expected_len == OVS_ATTR_NESTED ||
455                expected_len == OVS_ATTR_VARIABLE;
456 }
457
458 static bool is_all_zero(const u8 *fp, size_t size)
459 {
460         int i;
461
462         if (!fp)
463                 return false;
464
465         for (i = 0; i < size; i++)
466                 if (fp[i])
467                         return false;
468
469         return true;
470 }
471
472 static int __parse_flow_nlattrs(const struct nlattr *attr,
473                                 const struct nlattr *a[],
474                                 u64 *attrsp, bool log, bool nz)
475 {
476         const struct nlattr *nla;
477         u64 attrs;
478         int rem;
479
480         attrs = *attrsp;
481         nla_for_each_nested(nla, attr, rem) {
482                 u16 type = nla_type(nla);
483                 int expected_len;
484
485                 if (type > OVS_KEY_ATTR_MAX) {
486                         OVS_NLERR(log, "Key type %d is out of range max %d",
487                                   type, OVS_KEY_ATTR_MAX);
488                         return -EINVAL;
489                 }
490
491                 if (attrs & (1 << type)) {
492                         OVS_NLERR(log, "Duplicate key (type %d).", type);
493                         return -EINVAL;
494                 }
495
496                 expected_len = ovs_key_lens[type].len;
497                 if (!check_attr_len(nla_len(nla), expected_len)) {
498                         OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
499                                   type, nla_len(nla), expected_len);
500                         return -EINVAL;
501                 }
502
503                 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
504                         attrs |= 1 << type;
505                         a[type] = nla;
506                 }
507         }
508         if (rem) {
509                 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
510                 return -EINVAL;
511         }
512
513         *attrsp = attrs;
514         return 0;
515 }
516
517 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
518                                    const struct nlattr *a[], u64 *attrsp,
519                                    bool log)
520 {
521         return __parse_flow_nlattrs(attr, a, attrsp, log, true);
522 }
523
524 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
525                        u64 *attrsp, bool log)
526 {
527         return __parse_flow_nlattrs(attr, a, attrsp, log, false);
528 }
529
530 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
531                                      struct sw_flow_match *match, bool is_mask,
532                                      bool log)
533 {
534         unsigned long opt_key_offset;
535
536         if (nla_len(a) > sizeof(match->key->tun_opts)) {
537                 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
538                           nla_len(a), sizeof(match->key->tun_opts));
539                 return -EINVAL;
540         }
541
542         if (nla_len(a) % 4 != 0) {
543                 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
544                           nla_len(a));
545                 return -EINVAL;
546         }
547
548         /* We need to record the length of the options passed
549          * down, otherwise packets with the same format but
550          * additional options will be silently matched.
551          */
552         if (!is_mask) {
553                 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
554                                 false);
555         } else {
556                 /* This is somewhat unusual because it looks at
557                  * both the key and mask while parsing the
558                  * attributes (and by extension assumes the key
559                  * is parsed first). Normally, we would verify
560                  * that each is the correct length and that the
561                  * attributes line up in the validate function.
562                  * However, that is difficult because this is
563                  * variable length and we won't have the
564                  * information later.
565                  */
566                 if (match->key->tun_opts_len != nla_len(a)) {
567                         OVS_NLERR(log, "Geneve option len %d != mask len %d",
568                                   match->key->tun_opts_len, nla_len(a));
569                         return -EINVAL;
570                 }
571
572                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
573         }
574
575         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
576         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
577                                   nla_len(a), is_mask);
578         return 0;
579 }
580
581 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
582                                      struct sw_flow_match *match, bool is_mask,
583                                      bool log)
584 {
585         struct nlattr *a;
586         int rem;
587         unsigned long opt_key_offset;
588         struct vxlan_metadata opts;
589
590         BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
591
592         memset(&opts, 0, sizeof(opts));
593         nla_for_each_nested(a, attr, rem) {
594                 int type = nla_type(a);
595
596                 if (type > OVS_VXLAN_EXT_MAX) {
597                         OVS_NLERR(log, "VXLAN extension %d out of range max %d",
598                                   type, OVS_VXLAN_EXT_MAX);
599                         return -EINVAL;
600                 }
601
602                 if (!check_attr_len(nla_len(a),
603                                     ovs_vxlan_ext_key_lens[type].len)) {
604                         OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
605                                   type, nla_len(a),
606                                   ovs_vxlan_ext_key_lens[type].len);
607                         return -EINVAL;
608                 }
609
610                 switch (type) {
611                 case OVS_VXLAN_EXT_GBP:
612                         opts.gbp = nla_get_u32(a);
613                         break;
614                 default:
615                         OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
616                                   type);
617                         return -EINVAL;
618                 }
619         }
620         if (rem) {
621                 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
622                           rem);
623                 return -EINVAL;
624         }
625
626         if (!is_mask)
627                 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
628         else
629                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
630
631         opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
632         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
633                                   is_mask);
634         return 0;
635 }
636
637 static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
638                                       struct sw_flow_match *match, bool is_mask,
639                                       bool log)
640 {
641         unsigned long opt_key_offset;
642
643         BUILD_BUG_ON(sizeof(struct erspan_metadata) >
644                      sizeof(match->key->tun_opts));
645
646         if (nla_len(a) > sizeof(match->key->tun_opts)) {
647                 OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
648                           nla_len(a), sizeof(match->key->tun_opts));
649                 return -EINVAL;
650         }
651
652         if (!is_mask)
653                 SW_FLOW_KEY_PUT(match, tun_opts_len,
654                                 sizeof(struct erspan_metadata), false);
655         else
656                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
657
658         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
659         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
660                                   nla_len(a), is_mask);
661         return 0;
662 }
663
664 static int ip_tun_from_nlattr(const struct nlattr *attr,
665                               struct sw_flow_match *match, bool is_mask,
666                               bool log)
667 {
668         bool ttl = false, ipv4 = false, ipv6 = false;
669         __be16 tun_flags = 0;
670         int opts_type = 0;
671         struct nlattr *a;
672         int rem;
673
674         nla_for_each_nested(a, attr, rem) {
675                 int type = nla_type(a);
676                 int err;
677
678                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
679                         OVS_NLERR(log, "Tunnel attr %d out of range max %d",
680                                   type, OVS_TUNNEL_KEY_ATTR_MAX);
681                         return -EINVAL;
682                 }
683
684                 if (!check_attr_len(nla_len(a),
685                                     ovs_tunnel_key_lens[type].len)) {
686                         OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
687                                   type, nla_len(a), ovs_tunnel_key_lens[type].len);
688                         return -EINVAL;
689                 }
690
691                 switch (type) {
692                 case OVS_TUNNEL_KEY_ATTR_ID:
693                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
694                                         nla_get_be64(a), is_mask);
695                         tun_flags |= TUNNEL_KEY;
696                         break;
697                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
698                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
699                                         nla_get_in_addr(a), is_mask);
700                         ipv4 = true;
701                         break;
702                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
703                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
704                                         nla_get_in_addr(a), is_mask);
705                         ipv4 = true;
706                         break;
707                 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
708                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
709                                         nla_get_in6_addr(a), is_mask);
710                         ipv6 = true;
711                         break;
712                 case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
713                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
714                                         nla_get_in6_addr(a), is_mask);
715                         ipv6 = true;
716                         break;
717                 case OVS_TUNNEL_KEY_ATTR_TOS:
718                         SW_FLOW_KEY_PUT(match, tun_key.tos,
719                                         nla_get_u8(a), is_mask);
720                         break;
721                 case OVS_TUNNEL_KEY_ATTR_TTL:
722                         SW_FLOW_KEY_PUT(match, tun_key.ttl,
723                                         nla_get_u8(a), is_mask);
724                         ttl = true;
725                         break;
726                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
727                         tun_flags |= TUNNEL_DONT_FRAGMENT;
728                         break;
729                 case OVS_TUNNEL_KEY_ATTR_CSUM:
730                         tun_flags |= TUNNEL_CSUM;
731                         break;
732                 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
733                         SW_FLOW_KEY_PUT(match, tun_key.tp_src,
734                                         nla_get_be16(a), is_mask);
735                         break;
736                 case OVS_TUNNEL_KEY_ATTR_TP_DST:
737                         SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
738                                         nla_get_be16(a), is_mask);
739                         break;
740                 case OVS_TUNNEL_KEY_ATTR_OAM:
741                         tun_flags |= TUNNEL_OAM;
742                         break;
743                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
744                         if (opts_type) {
745                                 OVS_NLERR(log, "Multiple metadata blocks provided");
746                                 return -EINVAL;
747                         }
748
749                         err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
750                         if (err)
751                                 return err;
752
753                         tun_flags |= TUNNEL_GENEVE_OPT;
754                         opts_type = type;
755                         break;
756                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
757                         if (opts_type) {
758                                 OVS_NLERR(log, "Multiple metadata blocks provided");
759                                 return -EINVAL;
760                         }
761
762                         err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
763                         if (err)
764                                 return err;
765
766                         tun_flags |= TUNNEL_VXLAN_OPT;
767                         opts_type = type;
768                         break;
769                 case OVS_TUNNEL_KEY_ATTR_PAD:
770                         break;
771                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772                         if (opts_type) {
773                                 OVS_NLERR(log, "Multiple metadata blocks provided");
774                                 return -EINVAL;
775                         }
776
777                         err = erspan_tun_opt_from_nlattr(a, match, is_mask,
778                                                          log);
779                         if (err)
780                                 return err;
781
782                         tun_flags |= TUNNEL_ERSPAN_OPT;
783                         opts_type = type;
784                         break;
785                 default:
786                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
787                                   type);
788                         return -EINVAL;
789                 }
790         }
791
792         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
793         if (is_mask)
794                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
795         else
796                 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
797                                 false);
798
799         if (rem > 0) {
800                 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
801                           rem);
802                 return -EINVAL;
803         }
804
805         if (ipv4 && ipv6) {
806                 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
807                 return -EINVAL;
808         }
809
810         if (!is_mask) {
811                 if (!ipv4 && !ipv6) {
812                         OVS_NLERR(log, "IP tunnel dst address not specified");
813                         return -EINVAL;
814                 }
815                 if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
816                         OVS_NLERR(log, "IPv4 tunnel dst address is zero");
817                         return -EINVAL;
818                 }
819                 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
820                         OVS_NLERR(log, "IPv6 tunnel dst address is zero");
821                         return -EINVAL;
822                 }
823
824                 if (!ttl) {
825                         OVS_NLERR(log, "IP tunnel TTL not specified.");
826                         return -EINVAL;
827                 }
828         }
829
830         return opts_type;
831 }
832
833 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
834                                const void *tun_opts, int swkey_tun_opts_len)
835 {
836         const struct vxlan_metadata *opts = tun_opts;
837         struct nlattr *nla;
838
839         nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
840         if (!nla)
841                 return -EMSGSIZE;
842
843         if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
844                 return -EMSGSIZE;
845
846         nla_nest_end(skb, nla);
847         return 0;
848 }
849
850 static int __ip_tun_to_nlattr(struct sk_buff *skb,
851                               const struct ip_tunnel_key *output,
852                               const void *tun_opts, int swkey_tun_opts_len,
853                               unsigned short tun_proto)
854 {
855         if (output->tun_flags & TUNNEL_KEY &&
856             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
857                          OVS_TUNNEL_KEY_ATTR_PAD))
858                 return -EMSGSIZE;
859         switch (tun_proto) {
860         case AF_INET:
861                 if (output->u.ipv4.src &&
862                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
863                                     output->u.ipv4.src))
864                         return -EMSGSIZE;
865                 if (output->u.ipv4.dst &&
866                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
867                                     output->u.ipv4.dst))
868                         return -EMSGSIZE;
869                 break;
870         case AF_INET6:
871                 if (!ipv6_addr_any(&output->u.ipv6.src) &&
872                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
873                                      &output->u.ipv6.src))
874                         return -EMSGSIZE;
875                 if (!ipv6_addr_any(&output->u.ipv6.dst) &&
876                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
877                                      &output->u.ipv6.dst))
878                         return -EMSGSIZE;
879                 break;
880         }
881         if (output->tos &&
882             nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
883                 return -EMSGSIZE;
884         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
885                 return -EMSGSIZE;
886         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
887             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
888                 return -EMSGSIZE;
889         if ((output->tun_flags & TUNNEL_CSUM) &&
890             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
891                 return -EMSGSIZE;
892         if (output->tp_src &&
893             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
894                 return -EMSGSIZE;
895         if (output->tp_dst &&
896             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
897                 return -EMSGSIZE;
898         if ((output->tun_flags & TUNNEL_OAM) &&
899             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
900                 return -EMSGSIZE;
901         if (swkey_tun_opts_len) {
902                 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
903                     nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
904                             swkey_tun_opts_len, tun_opts))
905                         return -EMSGSIZE;
906                 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
907                          vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
908                         return -EMSGSIZE;
909                 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
910                          nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
911                                  swkey_tun_opts_len, tun_opts))
912                         return -EMSGSIZE;
913         }
914
915         return 0;
916 }
917
918 static int ip_tun_to_nlattr(struct sk_buff *skb,
919                             const struct ip_tunnel_key *output,
920                             const void *tun_opts, int swkey_tun_opts_len,
921                             unsigned short tun_proto)
922 {
923         struct nlattr *nla;
924         int err;
925
926         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
927         if (!nla)
928                 return -EMSGSIZE;
929
930         err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
931                                  tun_proto);
932         if (err)
933                 return err;
934
935         nla_nest_end(skb, nla);
936         return 0;
937 }
938
939 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
940                             struct ip_tunnel_info *tun_info)
941 {
942         return __ip_tun_to_nlattr(skb, &tun_info->key,
943                                   ip_tunnel_info_opts(tun_info),
944                                   tun_info->options_len,
945                                   ip_tunnel_info_af(tun_info));
946 }
947
948 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
949                                     const struct nlattr *a[],
950                                     bool is_mask, bool inner)
951 {
952         __be16 tci = 0;
953         __be16 tpid = 0;
954
955         if (a[OVS_KEY_ATTR_VLAN])
956                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
957
958         if (a[OVS_KEY_ATTR_ETHERTYPE])
959                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
960
961         if (likely(!inner)) {
962                 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
963                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
964         } else {
965                 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
966                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
967         }
968         return 0;
969 }
970
971 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
972                                       u64 key_attrs, bool inner,
973                                       const struct nlattr **a, bool log)
974 {
975         __be16 tci = 0;
976
977         if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
978               (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
979                eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
980                 /* Not a VLAN. */
981                 return 0;
982         }
983
984         if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
985               (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
986                 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
987                 return -EINVAL;
988         }
989
990         if (a[OVS_KEY_ATTR_VLAN])
991                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
992
993         if (!(tci & htons(VLAN_TAG_PRESENT))) {
994                 if (tci) {
995                         OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
996                                   (inner) ? "C-VLAN" : "VLAN");
997                         return -EINVAL;
998                 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
999                         /* Corner case for truncated VLAN header. */
1000                         OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1001                                   (inner) ? "C-VLAN" : "VLAN");
1002                         return -EINVAL;
1003                 }
1004         }
1005
1006         return 1;
1007 }
1008
1009 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1010                                            u64 key_attrs, bool inner,
1011                                            const struct nlattr **a, bool log)
1012 {
1013         __be16 tci = 0;
1014         __be16 tpid = 0;
1015         bool encap_valid = !!(match->key->eth.vlan.tci &
1016                               htons(VLAN_TAG_PRESENT));
1017         bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1018                                 htons(VLAN_TAG_PRESENT));
1019
1020         if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1021                 /* Not a VLAN. */
1022                 return 0;
1023         }
1024
1025         if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1026                 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1027                           (inner) ? "C-VLAN" : "VLAN");
1028                 return -EINVAL;
1029         }
1030
1031         if (a[OVS_KEY_ATTR_VLAN])
1032                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1033
1034         if (a[OVS_KEY_ATTR_ETHERTYPE])
1035                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1036
1037         if (tpid != htons(0xffff)) {
1038                 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1039                           (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1040                 return -EINVAL;
1041         }
1042         if (!(tci & htons(VLAN_TAG_PRESENT))) {
1043                 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
1044                           (inner) ? "C-VLAN" : "VLAN");
1045                 return -EINVAL;
1046         }
1047
1048         return 1;
1049 }
1050
1051 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1052                                      u64 *key_attrs, bool inner,
1053                                      const struct nlattr **a, bool is_mask,
1054                                      bool log)
1055 {
1056         int err;
1057         const struct nlattr *encap;
1058
1059         if (!is_mask)
1060                 err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1061                                                  a, log);
1062         else
1063                 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1064                                                       a, log);
1065         if (err <= 0)
1066                 return err;
1067
1068         err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1069         if (err)
1070                 return err;
1071
1072         *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1073         *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1074         *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1075
1076         encap = a[OVS_KEY_ATTR_ENCAP];
1077
1078         if (!is_mask)
1079                 err = parse_flow_nlattrs(encap, a, key_attrs, log);
1080         else
1081                 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1082
1083         return err;
1084 }
1085
1086 static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1087                                    u64 *key_attrs, const struct nlattr **a,
1088                                    bool is_mask, bool log)
1089 {
1090         int err;
1091         bool encap_valid = false;
1092
1093         err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1094                                         is_mask, log);
1095         if (err)
1096                 return err;
1097
1098         encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
1099         if (encap_valid) {
1100                 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1101                                                 is_mask, log);
1102                 if (err)
1103                         return err;
1104         }
1105
1106         return 0;
1107 }
1108
1109 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1110                                        u64 *attrs, const struct nlattr **a,
1111                                        bool is_mask, bool log)
1112 {
1113         __be16 eth_type;
1114
1115         eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1116         if (is_mask) {
1117                 /* Always exact match EtherType. */
1118                 eth_type = htons(0xffff);
1119         } else if (!eth_proto_is_802_3(eth_type)) {
1120                 OVS_NLERR(log, "EtherType %x is less than min %x",
1121                                 ntohs(eth_type), ETH_P_802_3_MIN);
1122                 return -EINVAL;
1123         }
1124
1125         SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1126         *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1127         return 0;
1128 }
1129
1130 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1131                                  u64 *attrs, const struct nlattr **a,
1132                                  bool is_mask, bool log)
1133 {
1134         u8 mac_proto = MAC_PROTO_ETHERNET;
1135
1136         if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1137                 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1138
1139                 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1140                 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1141         }
1142
1143         if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1144                 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1145
1146                 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1147                 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1148         }
1149
1150         if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1151                 SW_FLOW_KEY_PUT(match, phy.priority,
1152                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1153                 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1154         }
1155
1156         if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1157                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1158
1159                 if (is_mask) {
1160                         in_port = 0xffffffff; /* Always exact match in_port. */
1161                 } else if (in_port >= DP_MAX_PORTS) {
1162                         OVS_NLERR(log, "Port %d exceeds max allowable %d",
1163                                   in_port, DP_MAX_PORTS);
1164                         return -EINVAL;
1165                 }
1166
1167                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1168                 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1169         } else if (!is_mask) {
1170                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1171         }
1172
1173         if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1174                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1175
1176                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1177                 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1178         }
1179         if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1180                 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1181                                        is_mask, log) < 0)
1182                         return -EINVAL;
1183                 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1184         }
1185
1186         if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1187             ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1188                 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1189
1190                 if (ct_state & ~CT_SUPPORTED_MASK) {
1191                         OVS_NLERR(log, "ct_state flags %08x unsupported",
1192                                   ct_state);
1193                         return -EINVAL;
1194                 }
1195
1196                 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1197                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1198         }
1199         if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1200             ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1201                 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1202
1203                 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1204                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1205         }
1206         if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1207             ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1208                 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1209
1210                 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1211                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1212         }
1213         if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1214             ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1215                 const struct ovs_key_ct_labels *cl;
1216
1217                 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1218                 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1219                                    sizeof(*cl), is_mask);
1220                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1221         }
1222         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1223                 const struct ovs_key_ct_tuple_ipv4 *ct;
1224
1225                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1226
1227                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1228                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1229                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1230                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1231                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1232                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1233         }
1234         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1235                 const struct ovs_key_ct_tuple_ipv6 *ct;
1236
1237                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1238
1239                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1240                                    sizeof(match->key->ipv6.ct_orig.src),
1241                                    is_mask);
1242                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1243                                    sizeof(match->key->ipv6.ct_orig.dst),
1244                                    is_mask);
1245                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1246                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1247                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1248                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1249         }
1250
1251         /* For layer 3 packets the Ethernet type is provided
1252          * and treated as metadata but no MAC addresses are provided.
1253          */
1254         if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1255             (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1256                 mac_proto = MAC_PROTO_NONE;
1257
1258         /* Always exact match mac_proto */
1259         SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1260
1261         if (mac_proto == MAC_PROTO_NONE)
1262                 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1263                                                    log);
1264
1265         return 0;
1266 }
1267
1268 int nsh_hdr_from_nlattr(const struct nlattr *attr,
1269                         struct nshhdr *nh, size_t size)
1270 {
1271         struct nlattr *a;
1272         int rem;
1273         u8 flags = 0;
1274         u8 ttl = 0;
1275         int mdlen = 0;
1276
1277         /* validate_nsh has check this, so we needn't do duplicate check here
1278          */
1279         if (size < NSH_BASE_HDR_LEN)
1280                 return -ENOBUFS;
1281
1282         nla_for_each_nested(a, attr, rem) {
1283                 int type = nla_type(a);
1284
1285                 switch (type) {
1286                 case OVS_NSH_KEY_ATTR_BASE: {
1287                         const struct ovs_nsh_key_base *base = nla_data(a);
1288
1289                         flags = base->flags;
1290                         ttl = base->ttl;
1291                         nh->np = base->np;
1292                         nh->mdtype = base->mdtype;
1293                         nh->path_hdr = base->path_hdr;
1294                         break;
1295                 }
1296                 case OVS_NSH_KEY_ATTR_MD1:
1297                         mdlen = nla_len(a);
1298                         if (mdlen > size - NSH_BASE_HDR_LEN)
1299                                 return -ENOBUFS;
1300                         memcpy(&nh->md1, nla_data(a), mdlen);
1301                         break;
1302
1303                 case OVS_NSH_KEY_ATTR_MD2:
1304                         mdlen = nla_len(a);
1305                         if (mdlen > size - NSH_BASE_HDR_LEN)
1306                                 return -ENOBUFS;
1307                         memcpy(&nh->md2, nla_data(a), mdlen);
1308                         break;
1309
1310                 default:
1311                         return -EINVAL;
1312                 }
1313         }
1314
1315         /* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1316         nh->ver_flags_ttl_len = 0;
1317         nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1318
1319         return 0;
1320 }
1321
1322 int nsh_key_from_nlattr(const struct nlattr *attr,
1323                         struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1324 {
1325         struct nlattr *a;
1326         int rem;
1327
1328         /* validate_nsh has check this, so we needn't do duplicate check here
1329          */
1330         nla_for_each_nested(a, attr, rem) {
1331                 int type = nla_type(a);
1332
1333                 switch (type) {
1334                 case OVS_NSH_KEY_ATTR_BASE: {
1335                         const struct ovs_nsh_key_base *base = nla_data(a);
1336                         const struct ovs_nsh_key_base *base_mask = base + 1;
1337
1338                         nsh->base = *base;
1339                         nsh_mask->base = *base_mask;
1340                         break;
1341                 }
1342                 case OVS_NSH_KEY_ATTR_MD1: {
1343                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1344                         const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1345
1346                         memcpy(nsh->context, md1->context, sizeof(*md1));
1347                         memcpy(nsh_mask->context, md1_mask->context,
1348                                sizeof(*md1_mask));
1349                         break;
1350                 }
1351                 case OVS_NSH_KEY_ATTR_MD2:
1352                         /* Not supported yet */
1353                         return -ENOTSUPP;
1354                 default:
1355                         return -EINVAL;
1356                 }
1357         }
1358
1359         return 0;
1360 }
1361
1362 static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1363                                    struct sw_flow_match *match, bool is_mask,
1364                                    bool is_push_nsh, bool log)
1365 {
1366         struct nlattr *a;
1367         int rem;
1368         bool has_base = false;
1369         bool has_md1 = false;
1370         bool has_md2 = false;
1371         u8 mdtype = 0;
1372         int mdlen = 0;
1373
1374         if (WARN_ON(is_push_nsh && is_mask))
1375                 return -EINVAL;
1376
1377         nla_for_each_nested(a, attr, rem) {
1378                 int type = nla_type(a);
1379                 int i;
1380
1381                 if (type > OVS_NSH_KEY_ATTR_MAX) {
1382                         OVS_NLERR(log, "nsh attr %d is out of range max %d",
1383                                   type, OVS_NSH_KEY_ATTR_MAX);
1384                         return -EINVAL;
1385                 }
1386
1387                 if (!check_attr_len(nla_len(a),
1388                                     ovs_nsh_key_attr_lens[type].len)) {
1389                         OVS_NLERR(
1390                             log,
1391                             "nsh attr %d has unexpected len %d expected %d",
1392                             type,
1393                             nla_len(a),
1394                             ovs_nsh_key_attr_lens[type].len
1395                         );
1396                         return -EINVAL;
1397                 }
1398
1399                 switch (type) {
1400                 case OVS_NSH_KEY_ATTR_BASE: {
1401                         const struct ovs_nsh_key_base *base = nla_data(a);
1402
1403                         has_base = true;
1404                         mdtype = base->mdtype;
1405                         SW_FLOW_KEY_PUT(match, nsh.base.flags,
1406                                         base->flags, is_mask);
1407                         SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1408                                         base->ttl, is_mask);
1409                         SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1410                                         base->mdtype, is_mask);
1411                         SW_FLOW_KEY_PUT(match, nsh.base.np,
1412                                         base->np, is_mask);
1413                         SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1414                                         base->path_hdr, is_mask);
1415                         break;
1416                 }
1417                 case OVS_NSH_KEY_ATTR_MD1: {
1418                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1419
1420                         has_md1 = true;
1421                         for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1422                                 SW_FLOW_KEY_PUT(match, nsh.context[i],
1423                                                 md1->context[i], is_mask);
1424                         break;
1425                 }
1426                 case OVS_NSH_KEY_ATTR_MD2:
1427                         if (!is_push_nsh) /* Not supported MD type 2 yet */
1428                                 return -ENOTSUPP;
1429
1430                         has_md2 = true;
1431                         mdlen = nla_len(a);
1432                         if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1433                                 OVS_NLERR(
1434                                     log,
1435                                     "Invalid MD length %d for MD type %d",
1436                                     mdlen,
1437                                     mdtype
1438                                 );
1439                                 return -EINVAL;
1440                         }
1441                         break;
1442                 default:
1443                         OVS_NLERR(log, "Unknown nsh attribute %d",
1444                                   type);
1445                         return -EINVAL;
1446                 }
1447         }
1448
1449         if (rem > 0) {
1450                 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1451                 return -EINVAL;
1452         }
1453
1454         if (has_md1 && has_md2) {
1455                 OVS_NLERR(
1456                     1,
1457                     "invalid nsh attribute: md1 and md2 are exclusive."
1458                 );
1459                 return -EINVAL;
1460         }
1461
1462         if (!is_mask) {
1463                 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1464                     (has_md2 && mdtype != NSH_M_TYPE2)) {
1465                         OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1466                                   mdtype);
1467                         return -EINVAL;
1468                 }
1469
1470                 if (is_push_nsh &&
1471                     (!has_base || (!has_md1 && !has_md2))) {
1472                         OVS_NLERR(
1473                             1,
1474                             "push_nsh: missing base or metadata attributes"
1475                         );
1476                         return -EINVAL;
1477                 }
1478         }
1479
1480         return 0;
1481 }
1482
1483 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1484                                 u64 attrs, const struct nlattr **a,
1485                                 bool is_mask, bool log)
1486 {
1487         int err;
1488
1489         err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1490         if (err)
1491                 return err;
1492
1493         if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1494                 const struct ovs_key_ethernet *eth_key;
1495
1496                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1497                 SW_FLOW_KEY_MEMCPY(match, eth.src,
1498                                 eth_key->eth_src, ETH_ALEN, is_mask);
1499                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1500                                 eth_key->eth_dst, ETH_ALEN, is_mask);
1501                 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1502
1503                 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1504                         /* VLAN attribute is always parsed before getting here since it
1505                          * may occur multiple times.
1506                          */
1507                         OVS_NLERR(log, "VLAN attribute unexpected.");
1508                         return -EINVAL;
1509                 }
1510
1511                 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1512                         err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1513                                                           log);
1514                         if (err)
1515                                 return err;
1516                 } else if (!is_mask) {
1517                         SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1518                 }
1519         } else if (!match->key->eth.type) {
1520                 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1521                 return -EINVAL;
1522         }
1523
1524         if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1525                 const struct ovs_key_ipv4 *ipv4_key;
1526
1527                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1528                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1529                         OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1530                                   ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1531                         return -EINVAL;
1532                 }
1533                 SW_FLOW_KEY_PUT(match, ip.proto,
1534                                 ipv4_key->ipv4_proto, is_mask);
1535                 SW_FLOW_KEY_PUT(match, ip.tos,
1536                                 ipv4_key->ipv4_tos, is_mask);
1537                 SW_FLOW_KEY_PUT(match, ip.ttl,
1538                                 ipv4_key->ipv4_ttl, is_mask);
1539                 SW_FLOW_KEY_PUT(match, ip.frag,
1540                                 ipv4_key->ipv4_frag, is_mask);
1541                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1542                                 ipv4_key->ipv4_src, is_mask);
1543                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1544                                 ipv4_key->ipv4_dst, is_mask);
1545                 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1546         }
1547
1548         if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1549                 const struct ovs_key_ipv6 *ipv6_key;
1550
1551                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1552                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1553                         OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1554                                   ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1555                         return -EINVAL;
1556                 }
1557
1558                 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1559                         OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1560                                   ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1561                         return -EINVAL;
1562                 }
1563
1564                 SW_FLOW_KEY_PUT(match, ipv6.label,
1565                                 ipv6_key->ipv6_label, is_mask);
1566                 SW_FLOW_KEY_PUT(match, ip.proto,
1567                                 ipv6_key->ipv6_proto, is_mask);
1568                 SW_FLOW_KEY_PUT(match, ip.tos,
1569                                 ipv6_key->ipv6_tclass, is_mask);
1570                 SW_FLOW_KEY_PUT(match, ip.ttl,
1571                                 ipv6_key->ipv6_hlimit, is_mask);
1572                 SW_FLOW_KEY_PUT(match, ip.frag,
1573                                 ipv6_key->ipv6_frag, is_mask);
1574                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1575                                 ipv6_key->ipv6_src,
1576                                 sizeof(match->key->ipv6.addr.src),
1577                                 is_mask);
1578                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1579                                 ipv6_key->ipv6_dst,
1580                                 sizeof(match->key->ipv6.addr.dst),
1581                                 is_mask);
1582
1583                 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1584         }
1585
1586         if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1587                 const struct ovs_key_arp *arp_key;
1588
1589                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1590                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1591                         OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1592                                   arp_key->arp_op);
1593                         return -EINVAL;
1594                 }
1595
1596                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1597                                 arp_key->arp_sip, is_mask);
1598                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1599                         arp_key->arp_tip, is_mask);
1600                 SW_FLOW_KEY_PUT(match, ip.proto,
1601                                 ntohs(arp_key->arp_op), is_mask);
1602                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1603                                 arp_key->arp_sha, ETH_ALEN, is_mask);
1604                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1605                                 arp_key->arp_tha, ETH_ALEN, is_mask);
1606
1607                 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1608         }
1609
1610         if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1611                 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1612                                             is_mask, false, log) < 0)
1613                         return -EINVAL;
1614                 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1615         }
1616
1617         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1618                 const struct ovs_key_mpls *mpls_key;
1619
1620                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1621                 SW_FLOW_KEY_PUT(match, mpls.top_lse,
1622                                 mpls_key->mpls_lse, is_mask);
1623
1624                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1625          }
1626
1627         if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1628                 const struct ovs_key_tcp *tcp_key;
1629
1630                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1631                 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1632                 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1633                 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1634         }
1635
1636         if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1637                 SW_FLOW_KEY_PUT(match, tp.flags,
1638                                 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1639                                 is_mask);
1640                 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1641         }
1642
1643         if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1644                 const struct ovs_key_udp *udp_key;
1645
1646                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1647                 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1648                 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1649                 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1650         }
1651
1652         if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1653                 const struct ovs_key_sctp *sctp_key;
1654
1655                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1656                 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1657                 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1658                 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1659         }
1660
1661         if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1662                 const struct ovs_key_icmp *icmp_key;
1663
1664                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1665                 SW_FLOW_KEY_PUT(match, tp.src,
1666                                 htons(icmp_key->icmp_type), is_mask);
1667                 SW_FLOW_KEY_PUT(match, tp.dst,
1668                                 htons(icmp_key->icmp_code), is_mask);
1669                 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1670         }
1671
1672         if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1673                 const struct ovs_key_icmpv6 *icmpv6_key;
1674
1675                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1676                 SW_FLOW_KEY_PUT(match, tp.src,
1677                                 htons(icmpv6_key->icmpv6_type), is_mask);
1678                 SW_FLOW_KEY_PUT(match, tp.dst,
1679                                 htons(icmpv6_key->icmpv6_code), is_mask);
1680                 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1681         }
1682
1683         if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1684                 const struct ovs_key_nd *nd_key;
1685
1686                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1687                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1688                         nd_key->nd_target,
1689                         sizeof(match->key->ipv6.nd.target),
1690                         is_mask);
1691                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1692                         nd_key->nd_sll, ETH_ALEN, is_mask);
1693                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1694                                 nd_key->nd_tll, ETH_ALEN, is_mask);
1695                 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1696         }
1697
1698         if (attrs != 0) {
1699                 OVS_NLERR(log, "Unknown key attributes %llx",
1700                           (unsigned long long)attrs);
1701                 return -EINVAL;
1702         }
1703
1704         return 0;
1705 }
1706
1707 static void nlattr_set(struct nlattr *attr, u8 val,
1708                        const struct ovs_len_tbl *tbl)
1709 {
1710         struct nlattr *nla;
1711         int rem;
1712
1713         /* The nlattr stream should already have been validated */
1714         nla_for_each_nested(nla, attr, rem) {
1715                 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
1716                         if (tbl[nla_type(nla)].next)
1717                                 tbl = tbl[nla_type(nla)].next;
1718                         nlattr_set(nla, val, tbl);
1719                 } else {
1720                         memset(nla_data(nla), val, nla_len(nla));
1721                 }
1722
1723                 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1724                         *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1725         }
1726 }
1727
1728 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1729 {
1730         nlattr_set(attr, val, ovs_key_lens);
1731 }
1732
1733 /**
1734  * ovs_nla_get_match - parses Netlink attributes into a flow key and
1735  * mask. In case the 'mask' is NULL, the flow is treated as exact match
1736  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1737  * does not include any don't care bit.
1738  * @net: Used to determine per-namespace field support.
1739  * @match: receives the extracted flow match information.
1740  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1741  * sequence. The fields should of the packet that triggered the creation
1742  * of this flow.
1743  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1744  * attribute specifies the mask field of the wildcarded flow.
1745  * @log: Boolean to allow kernel error logging.  Normally true, but when
1746  * probing for feature compatibility this should be passed in as false to
1747  * suppress unnecessary error logging.
1748  */
1749 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1750                       const struct nlattr *nla_key,
1751                       const struct nlattr *nla_mask,
1752                       bool log)
1753 {
1754         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1755         struct nlattr *newmask = NULL;
1756         u64 key_attrs = 0;
1757         u64 mask_attrs = 0;
1758         int err;
1759
1760         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1761         if (err)
1762                 return err;
1763
1764         err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1765         if (err)
1766                 return err;
1767
1768         err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1769         if (err)
1770                 return err;
1771
1772         if (match->mask) {
1773                 if (!nla_mask) {
1774                         /* Create an exact match mask. We need to set to 0xff
1775                          * all the 'match->mask' fields that have been touched
1776                          * in 'match->key'. We cannot simply memset
1777                          * 'match->mask', because padding bytes and fields not
1778                          * specified in 'match->key' should be left to 0.
1779                          * Instead, we use a stream of netlink attributes,
1780                          * copied from 'key' and set to 0xff.
1781                          * ovs_key_from_nlattrs() will take care of filling
1782                          * 'match->mask' appropriately.
1783                          */
1784                         newmask = kmemdup(nla_key,
1785                                           nla_total_size(nla_len(nla_key)),
1786                                           GFP_KERNEL);
1787                         if (!newmask)
1788                                 return -ENOMEM;
1789
1790                         mask_set_nlattr(newmask, 0xff);
1791
1792                         /* The userspace does not send tunnel attributes that
1793                          * are 0, but we should not wildcard them nonetheless.
1794                          */
1795                         if (match->key->tun_proto)
1796                                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1797                                                          0xff, true);
1798
1799                         nla_mask = newmask;
1800                 }
1801
1802                 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1803                 if (err)
1804                         goto free_newmask;
1805
1806                 /* Always match on tci. */
1807                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1808                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1809
1810                 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1811                 if (err)
1812                         goto free_newmask;
1813
1814                 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1815                                            log);
1816                 if (err)
1817                         goto free_newmask;
1818         }
1819
1820         if (!match_validate(match, key_attrs, mask_attrs, log))
1821                 err = -EINVAL;
1822
1823 free_newmask:
1824         kfree(newmask);
1825         return err;
1826 }
1827
1828 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1829 {
1830         size_t len;
1831
1832         if (!attr)
1833                 return 0;
1834
1835         len = nla_len(attr);
1836         if (len < 1 || len > MAX_UFID_LENGTH) {
1837                 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1838                           nla_len(attr), MAX_UFID_LENGTH);
1839                 return 0;
1840         }
1841
1842         return len;
1843 }
1844
1845 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1846  * or false otherwise.
1847  */
1848 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1849                       bool log)
1850 {
1851         sfid->ufid_len = get_ufid_len(attr, log);
1852         if (sfid->ufid_len)
1853                 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1854
1855         return sfid->ufid_len;
1856 }
1857
1858 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1859                            const struct sw_flow_key *key, bool log)
1860 {
1861         struct sw_flow_key *new_key;
1862
1863         if (ovs_nla_get_ufid(sfid, ufid, log))
1864                 return 0;
1865
1866         /* If UFID was not provided, use unmasked key. */
1867         new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1868         if (!new_key)
1869                 return -ENOMEM;
1870         memcpy(new_key, key, sizeof(*key));
1871         sfid->unmasked_key = new_key;
1872
1873         return 0;
1874 }
1875
1876 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1877 {
1878         return attr ? nla_get_u32(attr) : 0;
1879 }
1880
1881 /**
1882  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1883  * @net: Network namespace.
1884  * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1885  * metadata.
1886  * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1887  * attributes.
1888  * @attrs: Bit mask for the netlink attributes included in @a.
1889  * @log: Boolean to allow kernel error logging.  Normally true, but when
1890  * probing for feature compatibility this should be passed in as false to
1891  * suppress unnecessary error logging.
1892  *
1893  * This parses a series of Netlink attributes that form a flow key, which must
1894  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1895  * get the metadata, that is, the parts of the flow key that cannot be
1896  * extracted from the packet itself.
1897  *
1898  * This must be called before the packet key fields are filled in 'key'.
1899  */
1900
1901 int ovs_nla_get_flow_metadata(struct net *net,
1902                               const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1903                               u64 attrs, struct sw_flow_key *key, bool log)
1904 {
1905         struct sw_flow_match match;
1906
1907         memset(&match, 0, sizeof(match));
1908         match.key = key;
1909
1910         key->ct_state = 0;
1911         key->ct_zone = 0;
1912         key->ct_orig_proto = 0;
1913         memset(&key->ct, 0, sizeof(key->ct));
1914         memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1915         memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1916
1917         key->phy.in_port = DP_MAX_PORTS;
1918
1919         return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1920 }
1921
1922 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1923                             bool is_mask)
1924 {
1925         __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1926
1927         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1928             nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1929                 return -EMSGSIZE;
1930         return 0;
1931 }
1932
1933 static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1934                              struct sk_buff *skb)
1935 {
1936         struct nlattr *start;
1937
1938         start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1939         if (!start)
1940                 return -EMSGSIZE;
1941
1942         if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1943                 goto nla_put_failure;
1944
1945         if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1946                 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1947                             sizeof(nsh->context), nsh->context))
1948                         goto nla_put_failure;
1949         }
1950
1951         /* Don't support MD type 2 yet */
1952
1953         nla_nest_end(skb, start);
1954
1955         return 0;
1956
1957 nla_put_failure:
1958         return -EMSGSIZE;
1959 }
1960
1961 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1962                              const struct sw_flow_key *output, bool is_mask,
1963                              struct sk_buff *skb)
1964 {
1965         struct ovs_key_ethernet *eth_key;
1966         struct nlattr *nla;
1967         struct nlattr *encap = NULL;
1968         struct nlattr *in_encap = NULL;
1969
1970         if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1971                 goto nla_put_failure;
1972
1973         if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1974                 goto nla_put_failure;
1975
1976         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1977                 goto nla_put_failure;
1978
1979         if ((swkey->tun_proto || is_mask)) {
1980                 const void *opts = NULL;
1981
1982                 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1983                         opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1984
1985                 if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1986                                      swkey->tun_opts_len, swkey->tun_proto))
1987                         goto nla_put_failure;
1988         }
1989
1990         if (swkey->phy.in_port == DP_MAX_PORTS) {
1991                 if (is_mask && (output->phy.in_port == 0xffff))
1992                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1993                                 goto nla_put_failure;
1994         } else {
1995                 u16 upper_u16;
1996                 upper_u16 = !is_mask ? 0 : 0xffff;
1997
1998                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1999                                 (upper_u16 << 16) | output->phy.in_port))
2000                         goto nla_put_failure;
2001         }
2002
2003         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2004                 goto nla_put_failure;
2005
2006         if (ovs_ct_put_key(swkey, output, skb))
2007                 goto nla_put_failure;
2008
2009         if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2010                 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2011                 if (!nla)
2012                         goto nla_put_failure;
2013
2014                 eth_key = nla_data(nla);
2015                 ether_addr_copy(eth_key->eth_src, output->eth.src);
2016                 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2017
2018                 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2019                         if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2020                                 goto nla_put_failure;
2021                         encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2022                         if (!swkey->eth.vlan.tci)
2023                                 goto unencap;
2024
2025                         if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2026                                 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2027                                         goto nla_put_failure;
2028                                 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2029                                 if (!swkey->eth.cvlan.tci)
2030                                         goto unencap;
2031                         }
2032                 }
2033
2034                 if (swkey->eth.type == htons(ETH_P_802_2)) {
2035                         /*
2036                         * Ethertype 802.2 is represented in the netlink with omitted
2037                         * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2038                         * 0xffff in the mask attribute.  Ethertype can also
2039                         * be wildcarded.
2040                         */
2041                         if (is_mask && output->eth.type)
2042                                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2043                                                         output->eth.type))
2044                                         goto nla_put_failure;
2045                         goto unencap;
2046                 }
2047         }
2048
2049         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2050                 goto nla_put_failure;
2051
2052         if (eth_type_vlan(swkey->eth.type)) {
2053                 /* There are 3 VLAN tags, we don't know anything about the rest
2054                  * of the packet, so truncate here.
2055                  */
2056                 WARN_ON_ONCE(!(encap && in_encap));
2057                 goto unencap;
2058         }
2059
2060         if (swkey->eth.type == htons(ETH_P_IP)) {
2061                 struct ovs_key_ipv4 *ipv4_key;
2062
2063                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2064                 if (!nla)
2065                         goto nla_put_failure;
2066                 ipv4_key = nla_data(nla);
2067                 ipv4_key->ipv4_src = output->ipv4.addr.src;
2068                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2069                 ipv4_key->ipv4_proto = output->ip.proto;
2070                 ipv4_key->ipv4_tos = output->ip.tos;
2071                 ipv4_key->ipv4_ttl = output->ip.ttl;
2072                 ipv4_key->ipv4_frag = output->ip.frag;
2073         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2074                 struct ovs_key_ipv6 *ipv6_key;
2075
2076                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2077                 if (!nla)
2078                         goto nla_put_failure;
2079                 ipv6_key = nla_data(nla);
2080                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2081                                 sizeof(ipv6_key->ipv6_src));
2082                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2083                                 sizeof(ipv6_key->ipv6_dst));
2084                 ipv6_key->ipv6_label = output->ipv6.label;
2085                 ipv6_key->ipv6_proto = output->ip.proto;
2086                 ipv6_key->ipv6_tclass = output->ip.tos;
2087                 ipv6_key->ipv6_hlimit = output->ip.ttl;
2088                 ipv6_key->ipv6_frag = output->ip.frag;
2089         } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2090                 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2091                         goto nla_put_failure;
2092         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
2093                    swkey->eth.type == htons(ETH_P_RARP)) {
2094                 struct ovs_key_arp *arp_key;
2095
2096                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2097                 if (!nla)
2098                         goto nla_put_failure;
2099                 arp_key = nla_data(nla);
2100                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
2101                 arp_key->arp_sip = output->ipv4.addr.src;
2102                 arp_key->arp_tip = output->ipv4.addr.dst;
2103                 arp_key->arp_op = htons(output->ip.proto);
2104                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2105                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2106         } else if (eth_p_mpls(swkey->eth.type)) {
2107                 struct ovs_key_mpls *mpls_key;
2108
2109                 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
2110                 if (!nla)
2111                         goto nla_put_failure;
2112                 mpls_key = nla_data(nla);
2113                 mpls_key->mpls_lse = output->mpls.top_lse;
2114         }
2115
2116         if ((swkey->eth.type == htons(ETH_P_IP) ||
2117              swkey->eth.type == htons(ETH_P_IPV6)) &&
2118              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2119
2120                 if (swkey->ip.proto == IPPROTO_TCP) {
2121                         struct ovs_key_tcp *tcp_key;
2122
2123                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2124                         if (!nla)
2125                                 goto nla_put_failure;
2126                         tcp_key = nla_data(nla);
2127                         tcp_key->tcp_src = output->tp.src;
2128                         tcp_key->tcp_dst = output->tp.dst;
2129                         if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2130                                          output->tp.flags))
2131                                 goto nla_put_failure;
2132                 } else if (swkey->ip.proto == IPPROTO_UDP) {
2133                         struct ovs_key_udp *udp_key;
2134
2135                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2136                         if (!nla)
2137                                 goto nla_put_failure;
2138                         udp_key = nla_data(nla);
2139                         udp_key->udp_src = output->tp.src;
2140                         udp_key->udp_dst = output->tp.dst;
2141                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
2142                         struct ovs_key_sctp *sctp_key;
2143
2144                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2145                         if (!nla)
2146                                 goto nla_put_failure;
2147                         sctp_key = nla_data(nla);
2148                         sctp_key->sctp_src = output->tp.src;
2149                         sctp_key->sctp_dst = output->tp.dst;
2150                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
2151                            swkey->ip.proto == IPPROTO_ICMP) {
2152                         struct ovs_key_icmp *icmp_key;
2153
2154                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2155                         if (!nla)
2156                                 goto nla_put_failure;
2157                         icmp_key = nla_data(nla);
2158                         icmp_key->icmp_type = ntohs(output->tp.src);
2159                         icmp_key->icmp_code = ntohs(output->tp.dst);
2160                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2161                            swkey->ip.proto == IPPROTO_ICMPV6) {
2162                         struct ovs_key_icmpv6 *icmpv6_key;
2163
2164                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2165                                                 sizeof(*icmpv6_key));
2166                         if (!nla)
2167                                 goto nla_put_failure;
2168                         icmpv6_key = nla_data(nla);
2169                         icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2170                         icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2171
2172                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
2173                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
2174                                 struct ovs_key_nd *nd_key;
2175
2176                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2177                                 if (!nla)
2178                                         goto nla_put_failure;
2179                                 nd_key = nla_data(nla);
2180                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2181                                                         sizeof(nd_key->nd_target));
2182                                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2183                                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2184                         }
2185                 }
2186         }
2187
2188 unencap:
2189         if (in_encap)
2190                 nla_nest_end(skb, in_encap);
2191         if (encap)
2192                 nla_nest_end(skb, encap);
2193
2194         return 0;
2195
2196 nla_put_failure:
2197         return -EMSGSIZE;
2198 }
2199
2200 int ovs_nla_put_key(const struct sw_flow_key *swkey,
2201                     const struct sw_flow_key *output, int attr, bool is_mask,
2202                     struct sk_buff *skb)
2203 {
2204         int err;
2205         struct nlattr *nla;
2206
2207         nla = nla_nest_start(skb, attr);
2208         if (!nla)
2209                 return -EMSGSIZE;
2210         err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2211         if (err)
2212                 return err;
2213         nla_nest_end(skb, nla);
2214
2215         return 0;
2216 }
2217
2218 /* Called with ovs_mutex or RCU read lock. */
2219 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2220 {
2221         if (ovs_identifier_is_ufid(&flow->id))
2222                 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2223                                flow->id.ufid);
2224
2225         return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2226                                OVS_FLOW_ATTR_KEY, false, skb);
2227 }
2228
2229 /* Called with ovs_mutex or RCU read lock. */
2230 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2231 {
2232         return ovs_nla_put_key(&flow->key, &flow->key,
2233                                 OVS_FLOW_ATTR_KEY, false, skb);
2234 }
2235
2236 /* Called with ovs_mutex or RCU read lock. */
2237 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2238 {
2239         return ovs_nla_put_key(&flow->key, &flow->mask->key,
2240                                 OVS_FLOW_ATTR_MASK, true, skb);
2241 }
2242
2243 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
2244
2245 static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2246 {
2247         struct sw_flow_actions *sfa;
2248
2249         WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2250
2251         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2252         if (!sfa)
2253                 return ERR_PTR(-ENOMEM);
2254
2255         sfa->actions_len = 0;
2256         return sfa;
2257 }
2258
2259 static void ovs_nla_free_set_action(const struct nlattr *a)
2260 {
2261         const struct nlattr *ovs_key = nla_data(a);
2262         struct ovs_tunnel_info *ovs_tun;
2263
2264         switch (nla_type(ovs_key)) {
2265         case OVS_KEY_ATTR_TUNNEL_INFO:
2266                 ovs_tun = nla_data(ovs_key);
2267                 dst_release((struct dst_entry *)ovs_tun->tun_dst);
2268                 break;
2269         }
2270 }
2271
2272 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2273 {
2274         const struct nlattr *a;
2275         int rem;
2276
2277         if (!sf_acts)
2278                 return;
2279
2280         nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
2281                 switch (nla_type(a)) {
2282                 case OVS_ACTION_ATTR_SET:
2283                         ovs_nla_free_set_action(a);
2284                         break;
2285                 case OVS_ACTION_ATTR_CT:
2286                         ovs_ct_free_action(a);
2287                         break;
2288                 }
2289         }
2290
2291         kfree(sf_acts);
2292 }
2293
2294 static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2295 {
2296         ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2297 }
2298
2299 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
2300  * The caller must hold rcu_read_lock for this to be sensible. */
2301 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2302 {
2303         call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2304 }
2305
2306 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2307                                        int attr_len, bool log)
2308 {
2309
2310         struct sw_flow_actions *acts;
2311         int new_acts_size;
2312         int req_size = NLA_ALIGN(attr_len);
2313         int next_offset = offsetof(struct sw_flow_actions, actions) +
2314                                         (*sfa)->actions_len;
2315
2316         if (req_size <= (ksize(*sfa) - next_offset))
2317                 goto out;
2318
2319         new_acts_size = ksize(*sfa) * 2;
2320
2321         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2322                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2323                         OVS_NLERR(log, "Flow action size exceeds max %u",
2324                                   MAX_ACTIONS_BUFSIZE);
2325                         return ERR_PTR(-EMSGSIZE);
2326                 }
2327                 new_acts_size = MAX_ACTIONS_BUFSIZE;
2328         }
2329
2330         acts = nla_alloc_flow_actions(new_acts_size);
2331         if (IS_ERR(acts))
2332                 return (void *)acts;
2333
2334         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2335         acts->actions_len = (*sfa)->actions_len;
2336         acts->orig_len = (*sfa)->orig_len;
2337         kfree(*sfa);
2338         *sfa = acts;
2339
2340 out:
2341         (*sfa)->actions_len += req_size;
2342         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2343 }
2344
2345 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2346                                    int attrtype, void *data, int len, bool log)
2347 {
2348         struct nlattr *a;
2349
2350         a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2351         if (IS_ERR(a))
2352                 return a;
2353
2354         a->nla_type = attrtype;
2355         a->nla_len = nla_attr_size(len);
2356
2357         if (data)
2358                 memcpy(nla_data(a), data, len);
2359         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2360
2361         return a;
2362 }
2363
2364 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2365                        int len, bool log)
2366 {
2367         struct nlattr *a;
2368
2369         a = __add_action(sfa, attrtype, data, len, log);
2370
2371         return PTR_ERR_OR_ZERO(a);
2372 }
2373
2374 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2375                                           int attrtype, bool log)
2376 {
2377         int used = (*sfa)->actions_len;
2378         int err;
2379
2380         err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2381         if (err)
2382                 return err;
2383
2384         return used;
2385 }
2386
2387 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2388                                          int st_offset)
2389 {
2390         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2391                                                                st_offset);
2392
2393         a->nla_len = sfa->actions_len - st_offset;
2394 }
2395
2396 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2397                                   const struct sw_flow_key *key,
2398                                   struct sw_flow_actions **sfa,
2399                                   __be16 eth_type, __be16 vlan_tci, bool log);
2400
2401 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2402                                     const struct sw_flow_key *key,
2403                                     struct sw_flow_actions **sfa,
2404                                     __be16 eth_type, __be16 vlan_tci,
2405                                     bool log, bool last)
2406 {
2407         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2408         const struct nlattr *probability, *actions;
2409         const struct nlattr *a;
2410         int rem, start, err;
2411         struct sample_arg arg;
2412
2413         memset(attrs, 0, sizeof(attrs));
2414         nla_for_each_nested(a, attr, rem) {
2415                 int type = nla_type(a);
2416                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2417                         return -EINVAL;
2418                 attrs[type] = a;
2419         }
2420         if (rem)
2421                 return -EINVAL;
2422
2423         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2424         if (!probability || nla_len(probability) != sizeof(u32))
2425                 return -EINVAL;
2426
2427         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2428         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2429                 return -EINVAL;
2430
2431         /* validation done, copy sample action. */
2432         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2433         if (start < 0)
2434                 return start;
2435
2436         /* When both skb and flow may be changed, put the sample
2437          * into a deferred fifo. On the other hand, if only skb
2438          * may be modified, the actions can be executed in place.
2439          *
2440          * Do this analysis at the flow installation time.
2441          * Set 'clone_action->exec' to true if the actions can be
2442          * executed without being deferred.
2443          *
2444          * If the sample is the last action, it can always be excuted
2445          * rather than deferred.
2446          */
2447         arg.exec = last || !actions_may_change_flow(actions);
2448         arg.probability = nla_get_u32(probability);
2449
2450         err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2451                                  log);
2452         if (err)
2453                 return err;
2454
2455         err = __ovs_nla_copy_actions(net, actions, key, sfa,
2456                                      eth_type, vlan_tci, log);
2457
2458         if (err)
2459                 return err;
2460
2461         add_nested_action_end(*sfa, start);
2462
2463         return 0;
2464 }
2465
2466 void ovs_match_init(struct sw_flow_match *match,
2467                     struct sw_flow_key *key,
2468                     bool reset_key,
2469                     struct sw_flow_mask *mask)
2470 {
2471         memset(match, 0, sizeof(*match));
2472         match->key = key;
2473         match->mask = mask;
2474
2475         if (reset_key)
2476                 memset(key, 0, sizeof(*key));
2477
2478         if (mask) {
2479                 memset(&mask->key, 0, sizeof(mask->key));
2480                 mask->range.start = mask->range.end = 0;
2481         }
2482 }
2483
2484 static int validate_geneve_opts(struct sw_flow_key *key)
2485 {
2486         struct geneve_opt *option;
2487         int opts_len = key->tun_opts_len;
2488         bool crit_opt = false;
2489
2490         option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2491         while (opts_len > 0) {
2492                 int len;
2493
2494                 if (opts_len < sizeof(*option))
2495                         return -EINVAL;
2496
2497                 len = sizeof(*option) + option->length * 4;
2498                 if (len > opts_len)
2499                         return -EINVAL;
2500
2501                 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2502
2503                 option = (struct geneve_opt *)((u8 *)option + len);
2504                 opts_len -= len;
2505         }
2506
2507         key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2508
2509         return 0;
2510 }
2511
2512 static int validate_and_copy_set_tun(const struct nlattr *attr,
2513                                      struct sw_flow_actions **sfa, bool log)
2514 {
2515         struct sw_flow_match match;
2516         struct sw_flow_key key;
2517         struct metadata_dst *tun_dst;
2518         struct ip_tunnel_info *tun_info;
2519         struct ovs_tunnel_info *ovs_tun;
2520         struct nlattr *a;
2521         int err = 0, start, opts_type;
2522
2523         ovs_match_init(&match, &key, true, NULL);
2524         opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2525         if (opts_type < 0)
2526                 return opts_type;
2527
2528         if (key.tun_opts_len) {
2529                 switch (opts_type) {
2530                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2531                         err = validate_geneve_opts(&key);
2532                         if (err < 0)
2533                                 return err;
2534                         break;
2535                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2536                         break;
2537                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2538                         break;
2539                 }
2540         }
2541
2542         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2543         if (start < 0)
2544                 return start;
2545
2546         tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2547                                      GFP_KERNEL);
2548
2549         if (!tun_dst)
2550                 return -ENOMEM;
2551
2552         err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2553         if (err) {
2554                 dst_release((struct dst_entry *)tun_dst);
2555                 return err;
2556         }
2557
2558         a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2559                          sizeof(*ovs_tun), log);
2560         if (IS_ERR(a)) {
2561                 dst_release((struct dst_entry *)tun_dst);
2562                 return PTR_ERR(a);
2563         }
2564
2565         ovs_tun = nla_data(a);
2566         ovs_tun->tun_dst = tun_dst;
2567
2568         tun_info = &tun_dst->u.tun_info;
2569         tun_info->mode = IP_TUNNEL_INFO_TX;
2570         if (key.tun_proto == AF_INET6)
2571                 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2572         tun_info->key = key.tun_key;
2573
2574         /* We need to store the options in the action itself since
2575          * everything else will go away after flow setup. We can append
2576          * it to tun_info and then point there.
2577          */
2578         ip_tunnel_info_opts_set(tun_info,
2579                                 TUN_METADATA_OPTS(&key, key.tun_opts_len),
2580                                 key.tun_opts_len);
2581         add_nested_action_end(*sfa, start);
2582
2583         return err;
2584 }
2585
2586 static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2587                          bool is_push_nsh, bool log)
2588 {
2589         struct sw_flow_match match;
2590         struct sw_flow_key key;
2591         int ret = 0;
2592
2593         ovs_match_init(&match, &key, true, NULL);
2594         ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2595                                       is_push_nsh, log);
2596         return !ret;
2597 }
2598
2599 /* Return false if there are any non-masked bits set.
2600  * Mask follows data immediately, before any netlink padding.
2601  */
2602 static bool validate_masked(u8 *data, int len)
2603 {
2604         u8 *mask = data + len;
2605
2606         while (len--)
2607                 if (*data++ & ~*mask++)
2608                         return false;
2609
2610         return true;
2611 }
2612
2613 static int validate_set(const struct nlattr *a,
2614                         const struct sw_flow_key *flow_key,
2615                         struct sw_flow_actions **sfa, bool *skip_copy,
2616                         u8 mac_proto, __be16 eth_type, bool masked, bool log)
2617 {
2618         const struct nlattr *ovs_key = nla_data(a);
2619         int key_type = nla_type(ovs_key);
2620         size_t key_len;
2621
2622         /* There can be only one key in a action */
2623         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2624                 return -EINVAL;
2625
2626         key_len = nla_len(ovs_key);
2627         if (masked)
2628                 key_len /= 2;
2629
2630         if (key_type > OVS_KEY_ATTR_MAX ||
2631             !check_attr_len(key_len, ovs_key_lens[key_type].len))
2632                 return -EINVAL;
2633
2634         if (masked && !validate_masked(nla_data(ovs_key), key_len))
2635                 return -EINVAL;
2636
2637         switch (key_type) {
2638         const struct ovs_key_ipv4 *ipv4_key;
2639         const struct ovs_key_ipv6 *ipv6_key;
2640         int err;
2641
2642         case OVS_KEY_ATTR_PRIORITY:
2643         case OVS_KEY_ATTR_SKB_MARK:
2644         case OVS_KEY_ATTR_CT_MARK:
2645         case OVS_KEY_ATTR_CT_LABELS:
2646                 break;
2647
2648         case OVS_KEY_ATTR_ETHERNET:
2649                 if (mac_proto != MAC_PROTO_ETHERNET)
2650                         return -EINVAL;
2651                 break;
2652
2653         case OVS_KEY_ATTR_TUNNEL:
2654                 if (masked)
2655                         return -EINVAL; /* Masked tunnel set not supported. */
2656
2657                 *skip_copy = true;
2658                 err = validate_and_copy_set_tun(a, sfa, log);
2659                 if (err)
2660                         return err;
2661                 break;
2662
2663         case OVS_KEY_ATTR_IPV4:
2664                 if (eth_type != htons(ETH_P_IP))
2665                         return -EINVAL;
2666
2667                 ipv4_key = nla_data(ovs_key);
2668
2669                 if (masked) {
2670                         const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2671
2672                         /* Non-writeable fields. */
2673                         if (mask->ipv4_proto || mask->ipv4_frag)
2674                                 return -EINVAL;
2675                 } else {
2676                         if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2677                                 return -EINVAL;
2678
2679                         if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2680                                 return -EINVAL;
2681                 }
2682                 break;
2683
2684         case OVS_KEY_ATTR_IPV6:
2685                 if (eth_type != htons(ETH_P_IPV6))
2686                         return -EINVAL;
2687
2688                 ipv6_key = nla_data(ovs_key);
2689
2690                 if (masked) {
2691                         const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2692
2693                         /* Non-writeable fields. */
2694                         if (mask->ipv6_proto || mask->ipv6_frag)
2695                                 return -EINVAL;
2696
2697                         /* Invalid bits in the flow label mask? */
2698                         if (ntohl(mask->ipv6_label) & 0xFFF00000)
2699                                 return -EINVAL;
2700                 } else {
2701                         if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2702                                 return -EINVAL;
2703
2704                         if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2705                                 return -EINVAL;
2706                 }
2707                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2708                         return -EINVAL;
2709
2710                 break;
2711
2712         case OVS_KEY_ATTR_TCP:
2713                 if ((eth_type != htons(ETH_P_IP) &&
2714                      eth_type != htons(ETH_P_IPV6)) ||
2715                     flow_key->ip.proto != IPPROTO_TCP)
2716                         return -EINVAL;
2717
2718                 break;
2719
2720         case OVS_KEY_ATTR_UDP:
2721                 if ((eth_type != htons(ETH_P_IP) &&
2722                      eth_type != htons(ETH_P_IPV6)) ||
2723                     flow_key->ip.proto != IPPROTO_UDP)
2724                         return -EINVAL;
2725
2726                 break;
2727
2728         case OVS_KEY_ATTR_MPLS:
2729                 if (!eth_p_mpls(eth_type))
2730                         return -EINVAL;
2731                 break;
2732
2733         case OVS_KEY_ATTR_SCTP:
2734                 if ((eth_type != htons(ETH_P_IP) &&
2735                      eth_type != htons(ETH_P_IPV6)) ||
2736                     flow_key->ip.proto != IPPROTO_SCTP)
2737                         return -EINVAL;
2738
2739                 break;
2740
2741         case OVS_KEY_ATTR_NSH:
2742                 if (eth_type != htons(ETH_P_NSH))
2743                         return -EINVAL;
2744                 if (!validate_nsh(nla_data(a), masked, false, log))
2745                         return -EINVAL;
2746                 break;
2747
2748         default:
2749                 return -EINVAL;
2750         }
2751
2752         /* Convert non-masked non-tunnel set actions to masked set actions. */
2753         if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2754                 int start, len = key_len * 2;
2755                 struct nlattr *at;
2756
2757                 *skip_copy = true;
2758
2759                 start = add_nested_action_start(sfa,
2760                                                 OVS_ACTION_ATTR_SET_TO_MASKED,
2761                                                 log);
2762                 if (start < 0)
2763                         return start;
2764
2765                 at = __add_action(sfa, key_type, NULL, len, log);
2766                 if (IS_ERR(at))
2767                         return PTR_ERR(at);
2768
2769                 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2770                 memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2771                 /* Clear non-writeable bits from otherwise writeable fields. */
2772                 if (key_type == OVS_KEY_ATTR_IPV6) {
2773                         struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2774
2775                         mask->ipv6_label &= htonl(0x000FFFFF);
2776                 }
2777                 add_nested_action_end(*sfa, start);
2778         }
2779
2780         return 0;
2781 }
2782
2783 static int validate_userspace(const struct nlattr *attr)
2784 {
2785         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2786                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2787                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2788                 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2789         };
2790         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2791         int error;
2792
2793         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
2794                                  userspace_policy, NULL);
2795         if (error)
2796                 return error;
2797
2798         if (!a[OVS_USERSPACE_ATTR_PID] ||
2799             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2800                 return -EINVAL;
2801
2802         return 0;
2803 }
2804
2805 static int copy_action(const struct nlattr *from,
2806                        struct sw_flow_actions **sfa, bool log)
2807 {
2808         int totlen = NLA_ALIGN(from->nla_len);
2809         struct nlattr *to;
2810
2811         to = reserve_sfa_size(sfa, from->nla_len, log);
2812         if (IS_ERR(to))
2813                 return PTR_ERR(to);
2814
2815         memcpy(to, from, totlen);
2816         return 0;
2817 }
2818
2819 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2820                                   const struct sw_flow_key *key,
2821                                   struct sw_flow_actions **sfa,
2822                                   __be16 eth_type, __be16 vlan_tci, bool log)
2823 {
2824         u8 mac_proto = ovs_key_mac_proto(key);
2825         const struct nlattr *a;
2826         int rem, err;
2827
2828         nla_for_each_nested(a, attr, rem) {
2829                 /* Expected argument lengths, (u32)-1 for variable length. */
2830                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2831                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2832                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2833                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2834                         [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2835                         [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2836                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2837                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
2838                         [OVS_ACTION_ATTR_SET] = (u32)-1,
2839                         [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2840                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2841                         [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2842                         [OVS_ACTION_ATTR_CT] = (u32)-1,
2843                         [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2844                         [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2845                         [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2846                         [OVS_ACTION_ATTR_POP_ETH] = 0,
2847                         [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2848                         [OVS_ACTION_ATTR_POP_NSH] = 0,
2849                         [OVS_ACTION_ATTR_METER] = sizeof(u32),
2850                 };
2851                 const struct ovs_action_push_vlan *vlan;
2852                 int type = nla_type(a);
2853                 bool skip_copy;
2854
2855                 if (type > OVS_ACTION_ATTR_MAX ||
2856                     (action_lens[type] != nla_len(a) &&
2857                      action_lens[type] != (u32)-1))
2858                         return -EINVAL;
2859
2860                 skip_copy = false;
2861                 switch (type) {
2862                 case OVS_ACTION_ATTR_UNSPEC:
2863                         return -EINVAL;
2864
2865                 case OVS_ACTION_ATTR_USERSPACE:
2866                         err = validate_userspace(a);
2867                         if (err)
2868                                 return err;
2869                         break;
2870
2871                 case OVS_ACTION_ATTR_OUTPUT:
2872                         if (nla_get_u32(a) >= DP_MAX_PORTS)
2873                                 return -EINVAL;
2874                         break;
2875
2876                 case OVS_ACTION_ATTR_TRUNC: {
2877                         const struct ovs_action_trunc *trunc = nla_data(a);
2878
2879                         if (trunc->max_len < ETH_HLEN)
2880                                 return -EINVAL;
2881                         break;
2882                 }
2883
2884                 case OVS_ACTION_ATTR_HASH: {
2885                         const struct ovs_action_hash *act_hash = nla_data(a);
2886
2887                         switch (act_hash->hash_alg) {
2888                         case OVS_HASH_ALG_L4:
2889                                 break;
2890                         default:
2891                                 return  -EINVAL;
2892                         }
2893
2894                         break;
2895                 }
2896
2897                 case OVS_ACTION_ATTR_POP_VLAN:
2898                         if (mac_proto != MAC_PROTO_ETHERNET)
2899                                 return -EINVAL;
2900                         vlan_tci = htons(0);
2901                         break;
2902
2903                 case OVS_ACTION_ATTR_PUSH_VLAN:
2904                         if (mac_proto != MAC_PROTO_ETHERNET)
2905                                 return -EINVAL;
2906                         vlan = nla_data(a);
2907                         if (!eth_type_vlan(vlan->vlan_tpid))
2908                                 return -EINVAL;
2909                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2910                                 return -EINVAL;
2911                         vlan_tci = vlan->vlan_tci;
2912                         break;
2913
2914                 case OVS_ACTION_ATTR_RECIRC:
2915                         break;
2916
2917                 case OVS_ACTION_ATTR_PUSH_MPLS: {
2918                         const struct ovs_action_push_mpls *mpls = nla_data(a);
2919
2920                         if (!eth_p_mpls(mpls->mpls_ethertype))
2921                                 return -EINVAL;
2922                         /* Prohibit push MPLS other than to a white list
2923                          * for packets that have a known tag order.
2924                          */
2925                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2926                             (eth_type != htons(ETH_P_IP) &&
2927                              eth_type != htons(ETH_P_IPV6) &&
2928                              eth_type != htons(ETH_P_ARP) &&
2929                              eth_type != htons(ETH_P_RARP) &&
2930                              !eth_p_mpls(eth_type)))
2931                                 return -EINVAL;
2932                         eth_type = mpls->mpls_ethertype;
2933                         break;
2934                 }
2935
2936                 case OVS_ACTION_ATTR_POP_MPLS:
2937                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2938                             !eth_p_mpls(eth_type))
2939                                 return -EINVAL;
2940
2941                         /* Disallow subsequent L2.5+ set and mpls_pop actions
2942                          * as there is no check here to ensure that the new
2943                          * eth_type is valid and thus set actions could
2944                          * write off the end of the packet or otherwise
2945                          * corrupt it.
2946                          *
2947                          * Support for these actions is planned using packet
2948                          * recirculation.
2949                          */
2950                         eth_type = htons(0);
2951                         break;
2952
2953                 case OVS_ACTION_ATTR_SET:
2954                         err = validate_set(a, key, sfa,
2955                                            &skip_copy, mac_proto, eth_type,
2956                                            false, log);
2957                         if (err)
2958                                 return err;
2959                         break;
2960
2961                 case OVS_ACTION_ATTR_SET_MASKED:
2962                         err = validate_set(a, key, sfa,
2963                                            &skip_copy, mac_proto, eth_type,
2964                                            true, log);
2965                         if (err)
2966                                 return err;
2967                         break;
2968
2969                 case OVS_ACTION_ATTR_SAMPLE: {
2970                         bool last = nla_is_last(a, rem);
2971
2972                         err = validate_and_copy_sample(net, a, key, sfa,
2973                                                        eth_type, vlan_tci,
2974                                                        log, last);
2975                         if (err)
2976                                 return err;
2977                         skip_copy = true;
2978                         break;
2979                 }
2980
2981                 case OVS_ACTION_ATTR_CT:
2982                         err = ovs_ct_copy_action(net, a, key, sfa, log);
2983                         if (err)
2984                                 return err;
2985                         skip_copy = true;
2986                         break;
2987
2988                 case OVS_ACTION_ATTR_CT_CLEAR:
2989                         break;
2990
2991                 case OVS_ACTION_ATTR_PUSH_ETH:
2992                         /* Disallow pushing an Ethernet header if one
2993                          * is already present */
2994                         if (mac_proto != MAC_PROTO_NONE)
2995                                 return -EINVAL;
2996                         mac_proto = MAC_PROTO_NONE;
2997                         break;
2998
2999                 case OVS_ACTION_ATTR_POP_ETH:
3000                         if (mac_proto != MAC_PROTO_ETHERNET)
3001                                 return -EINVAL;
3002                         if (vlan_tci & htons(VLAN_TAG_PRESENT))
3003                                 return -EINVAL;
3004                         mac_proto = MAC_PROTO_ETHERNET;
3005                         break;
3006
3007                 case OVS_ACTION_ATTR_PUSH_NSH:
3008                         if (mac_proto != MAC_PROTO_ETHERNET) {
3009                                 u8 next_proto;
3010
3011                                 next_proto = tun_p_from_eth_p(eth_type);
3012                                 if (!next_proto)
3013                                         return -EINVAL;
3014                         }
3015                         mac_proto = MAC_PROTO_NONE;
3016                         if (!validate_nsh(nla_data(a), false, true, true))
3017                                 return -EINVAL;
3018                         break;
3019
3020                 case OVS_ACTION_ATTR_POP_NSH: {
3021                         __be16 inner_proto;
3022
3023                         if (eth_type != htons(ETH_P_NSH))
3024                                 return -EINVAL;
3025                         inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3026                         if (!inner_proto)
3027                                 return -EINVAL;
3028                         if (key->nsh.base.np == TUN_P_ETHERNET)
3029                                 mac_proto = MAC_PROTO_ETHERNET;
3030                         else
3031                                 mac_proto = MAC_PROTO_NONE;
3032                         break;
3033                 }
3034
3035                 case OVS_ACTION_ATTR_METER:
3036                         /* Non-existent meters are simply ignored.  */
3037                         break;
3038
3039                 default:
3040                         OVS_NLERR(log, "Unknown Action type %d", type);
3041                         return -EINVAL;
3042                 }
3043                 if (!skip_copy) {
3044                         err = copy_action(a, sfa, log);
3045                         if (err)
3046                                 return err;
3047                 }
3048         }
3049
3050         if (rem > 0)
3051                 return -EINVAL;
3052
3053         return 0;
3054 }
3055
3056 /* 'key' must be the masked key. */
3057 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3058                          const struct sw_flow_key *key,
3059                          struct sw_flow_actions **sfa, bool log)
3060 {
3061         int err;
3062
3063         *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3064         if (IS_ERR(*sfa))
3065                 return PTR_ERR(*sfa);
3066
3067         (*sfa)->orig_len = nla_len(attr);
3068         err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3069                                      key->eth.vlan.tci, log);
3070         if (err)
3071                 ovs_nla_free_flow_actions(*sfa);
3072
3073         return err;
3074 }
3075
3076 static int sample_action_to_attr(const struct nlattr *attr,
3077                                  struct sk_buff *skb)
3078 {
3079         struct nlattr *start, *ac_start = NULL, *sample_arg;
3080         int err = 0, rem = nla_len(attr);
3081         const struct sample_arg *arg;
3082         struct nlattr *actions;
3083
3084         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
3085         if (!start)
3086                 return -EMSGSIZE;
3087
3088         sample_arg = nla_data(attr);
3089         arg = nla_data(sample_arg);
3090         actions = nla_next(sample_arg, &rem);
3091
3092         if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3093                 err = -EMSGSIZE;
3094                 goto out;
3095         }
3096
3097         ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
3098         if (!ac_start) {
3099                 err = -EMSGSIZE;
3100                 goto out;
3101         }
3102
3103         err = ovs_nla_put_actions(actions, rem, skb);
3104
3105 out:
3106         if (err) {
3107                 nla_nest_cancel(skb, ac_start);
3108                 nla_nest_cancel(skb, start);
3109         } else {
3110                 nla_nest_end(skb, ac_start);
3111                 nla_nest_end(skb, start);
3112         }
3113
3114         return err;
3115 }
3116
3117 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3118 {
3119         const struct nlattr *ovs_key = nla_data(a);
3120         int key_type = nla_type(ovs_key);
3121         struct nlattr *start;
3122         int err;
3123
3124         switch (key_type) {
3125         case OVS_KEY_ATTR_TUNNEL_INFO: {
3126                 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3127                 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3128
3129                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3130                 if (!start)
3131                         return -EMSGSIZE;
3132
3133                 err =  ip_tun_to_nlattr(skb, &tun_info->key,
3134                                         ip_tunnel_info_opts(tun_info),
3135                                         tun_info->options_len,
3136                                         ip_tunnel_info_af(tun_info));
3137                 if (err)
3138                         return err;
3139                 nla_nest_end(skb, start);
3140                 break;
3141         }
3142         default:
3143                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3144                         return -EMSGSIZE;
3145                 break;
3146         }
3147
3148         return 0;
3149 }
3150
3151 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3152                                                 struct sk_buff *skb)
3153 {
3154         const struct nlattr *ovs_key = nla_data(a);
3155         struct nlattr *nla;
3156         size_t key_len = nla_len(ovs_key) / 2;
3157
3158         /* Revert the conversion we did from a non-masked set action to
3159          * masked set action.
3160          */
3161         nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3162         if (!nla)
3163                 return -EMSGSIZE;
3164
3165         if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3166                 return -EMSGSIZE;
3167
3168         nla_nest_end(skb, nla);
3169         return 0;
3170 }
3171
3172 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3173 {
3174         const struct nlattr *a;
3175         int rem, err;
3176
3177         nla_for_each_attr(a, attr, len, rem) {
3178                 int type = nla_type(a);
3179
3180                 switch (type) {
3181                 case OVS_ACTION_ATTR_SET:
3182                         err = set_action_to_attr(a, skb);
3183                         if (err)
3184                                 return err;
3185                         break;
3186
3187                 case OVS_ACTION_ATTR_SET_TO_MASKED:
3188                         err = masked_set_action_to_set_action_attr(a, skb);
3189                         if (err)
3190                                 return err;
3191                         break;
3192
3193                 case OVS_ACTION_ATTR_SAMPLE:
3194                         err = sample_action_to_attr(a, skb);
3195                         if (err)
3196                                 return err;
3197                         break;
3198
3199                 case OVS_ACTION_ATTR_CT:
3200                         err = ovs_ct_action_to_attr(nla_data(a), skb);
3201                         if (err)
3202                                 return err;
3203                         break;
3204
3205                 default:
3206                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
3207                                 return -EMSGSIZE;
3208                         break;
3209                 }
3210         }
3211
3212         return 0;
3213 }