• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/qosify/qosify-bpf.c

  1 // SPDX-License-Identifier: GPL-2.0+
  2 /*
  3  * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
  4  */
  5 #define KBUILD_MODNAME "foo"
  6 #include <uapi/linux/bpf.h>
  7 #include <uapi/linux/if_ether.h>
  8 #include <uapi/linux/if_packet.h>
  9 #include <uapi/linux/ip.h>
 10 #include <uapi/linux/ipv6.h>
 11 #include <uapi/linux/in.h>
 12 #include <uapi/linux/tcp.h>
 13 #include <uapi/linux/udp.h>
 14 #include <uapi/linux/filter.h>
 15 #include <uapi/linux/pkt_cls.h>
 16 #include <linux/ip.h>
 17 #include <net/ipv6.h>
 18 #include <bpf/bpf_helpers.h>
 19 #include <bpf/bpf_endian.h>
 20 #include "qosify-bpf.h"
 21 
 22 #define INET_ECN_MASK 3
 23 
 24 #define FLOW_CHECK_INTERVAL     ((u32)((1000000000ULL) >> 24))
 25 #define FLOW_TIMEOUT            ((u32)((30ULL * 1000000000ULL) >> 24))
 26 #define FLOW_BULK_TIMEOUT       5
 27 
 28 #define EWMA_SHIFT              12
 29 
 30 const volatile static uint32_t module_flags = 0;
 31 
 32 struct flow_bucket {
 33         __u32 last_update;
 34         __u32 pkt_len_avg;
 35         __u32 pkt_count;
 36         __u32 bulk_timeout;
 37 };
 38 
 39 struct {
 40         __uint(type, BPF_MAP_TYPE_ARRAY);
 41         __uint(pinning, 1);
 42         __type(key, __u32);
 43         __type(value, struct qosify_config);
 44         __uint(max_entries, 1);
 45 } config SEC(".maps");
 46 
 47 struct {
 48         __uint(type, BPF_MAP_TYPE_ARRAY);
 49         __uint(pinning, 1);
 50         __type(key, __u32);
 51         __type(value, __u8);
 52         __uint(max_entries, 1 << 16);
 53 } tcp_ports SEC(".maps");
 54 
 55 struct {
 56         __uint(type, BPF_MAP_TYPE_ARRAY);
 57         __uint(pinning, 1);
 58         __type(key, __u32);
 59         __type(value, __u8);
 60         __uint(max_entries, 1 << 16);
 61 } udp_ports SEC(".maps");
 62 
 63 struct {
 64         __uint(type, BPF_MAP_TYPE_LRU_HASH);
 65         __uint(pinning, 1);
 66         __type(key, __u32);
 67         __type(value, struct flow_bucket);
 68         __uint(max_entries, QOSIFY_FLOW_BUCKETS);
 69 } flow_map SEC(".maps");
 70 
 71 struct {
 72         __uint(type, BPF_MAP_TYPE_HASH);
 73         __uint(pinning, 1);
 74         __uint(key_size, sizeof(struct in_addr));
 75         __type(value, struct qosify_ip_map_val);
 76         __uint(max_entries, 100000);
 77         __uint(map_flags, BPF_F_NO_PREALLOC);
 78 } ipv4_map SEC(".maps");
 79 
 80 struct {
 81         __uint(type, BPF_MAP_TYPE_HASH);
 82         __uint(pinning, 1);
 83         __uint(key_size, sizeof(struct in6_addr));
 84         __type(value, struct qosify_ip_map_val);
 85         __uint(max_entries, 100000);
 86         __uint(map_flags, BPF_F_NO_PREALLOC);
 87 } ipv6_map SEC(".maps");
 88 
 89 struct {
 90         __uint(type, BPF_MAP_TYPE_ARRAY);
 91         __uint(pinning, 1);
 92         __type(key, __u32);
 93         __type(value, struct qosify_class);
 94         __uint(max_entries, QOSIFY_MAX_CLASS_ENTRIES +
 95                             QOSIFY_DEFAULT_CLASS_ENTRIES);
 96 } class_map SEC(".maps");
 97 
 98 static struct qosify_config *get_config(void)
 99 {
100         __u32 key = 0;
101 
102         return bpf_map_lookup_elem(&config, &key);
103 }
104 
105 static __always_inline int proto_is_vlan(__u16 h_proto)
106 {
107         return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
108                   h_proto == bpf_htons(ETH_P_8021AD));
109 }
110 
111 static __always_inline int proto_is_ip(__u16 h_proto)
112 {
113         return !!(h_proto == bpf_htons(ETH_P_IP) ||
114                   h_proto == bpf_htons(ETH_P_IPV6));
115 }
116 
117 static __always_inline void *skb_ptr(struct __sk_buff *skb, __u32 offset)
118 {
119         void *start = (void *)(unsigned long long)skb->data;
120 
121         return start + offset;
122 }
123 
124 static __always_inline void *skb_end_ptr(struct __sk_buff *skb)
125 {
126         return (void *)(unsigned long long)skb->data_end;
127 }
128 
129 static __always_inline int skb_check(struct __sk_buff *skb, void *ptr)
130 {
131         if (ptr > skb_end_ptr(skb))
132                 return -1;
133 
134         return 0;
135 }
136 
137 static __always_inline __u32 cur_time(void)
138 {
139         __u32 val = bpf_ktime_get_ns() >> 24;
140 
141         if (!val)
142                 val = 1;
143 
144         return val;
145 }
146 
147 static __always_inline __u32 ewma(__u32 *avg, __u32 val)
148 {
149         if (*avg)
150                 *avg = (*avg * 3) / 4 + (val << EWMA_SHIFT) / 4;
151         else
152                 *avg = val << EWMA_SHIFT;
153 
154         return *avg >> EWMA_SHIFT;
155 }
156 
157 static __always_inline __u8 dscp_val(struct qosify_dscp_val *val, bool ingress)
158 {
159         __u8 ival = val->ingress;
160         __u8 eval = val->egress;
161 
162         return ingress ? ival : eval;
163 }
164 
165 static __always_inline void
166 ipv4_change_dsfield(struct __sk_buff *skb, __u32 offset,
167                     __u8 mask, __u8 value, bool force)
168 {
169         struct iphdr *iph;
170         __u32 check;
171         __u8 dsfield;
172 
173         iph = skb_ptr(skb, offset);
174         if (skb_check(skb, iph + 1))
175                 return;
176 
177         check = bpf_ntohs(iph->check);
178         if ((iph->tos & mask) && !force)
179                 return;
180 
181         dsfield = (iph->tos & mask) | value;
182         if (iph->tos == dsfield)
183                 return;
184 
185         check += iph->tos;
186         if ((check + 1) >> 16)
187                 check = (check + 1) & 0xffff;
188         check -= dsfield;
189         check += check >> 16;
190         iph->check = bpf_htons(check);
191         iph->tos = dsfield;
192 }
193 
194 static __always_inline void
195 ipv6_change_dsfield(struct __sk_buff *skb, __u32 offset,
196                     __u8 mask, __u8 value, bool force)
197 {
198         struct ipv6hdr *ipv6h;
199         __u16 *p;
200         __u16 val;
201 
202         ipv6h = skb_ptr(skb, offset);
203         if (skb_check(skb, ipv6h + 1))
204                 return;
205 
206         p = (__u16 *)ipv6h;
207         if (((*p >> 4) & mask) && !force)
208                 return;
209 
210         val = (*p & bpf_htons((((__u16)mask << 4) | 0xf00f))) | bpf_htons((__u16)value << 4);
211         if (val == *p)
212                 return;
213 
214         *p = val;
215 }
216 
217 static __always_inline int
218 parse_ethernet(struct __sk_buff *skb, __u32 *offset)
219 {
220         struct ethhdr *eth;
221         __u16 h_proto;
222         int i;
223 
224         eth = skb_ptr(skb, *offset);
225         if (skb_check(skb, eth + 1))
226                 return -1;
227 
228         h_proto = eth->h_proto;
229         *offset += sizeof(*eth);
230 
231 #pragma unroll
232         for (i = 0; i < 2; i++) {
233                 struct vlan_hdr *vlh = skb_ptr(skb, *offset);
234 
235                 if (!proto_is_vlan(h_proto))
236                         break;
237 
238                 if (skb_check(skb, vlh + 1))
239                         return -1;
240 
241                 h_proto = vlh->h_vlan_encapsulated_proto;
242                 *offset += sizeof(*vlh);
243         }
244 
245         return h_proto;
246 }
247 
248 static void
249 parse_l4proto(struct qosify_config *config, struct __sk_buff *skb,
250               __u32 offset, __u8 proto, bool ingress,
251               __u8 *out_val)
252 {
253         struct udphdr *udp;
254         __u32 src, dest, key;
255         __u8 *value;
256 
257         udp = skb_ptr(skb, offset);
258         if (skb_check(skb, &udp->len))
259                 return;
260 
261         if (config && (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)) {
262                 *out_val = config->dscp_icmp;
263                 return;
264         }
265 
266         src = READ_ONCE(udp->source);
267         dest = READ_ONCE(udp->dest);
268         if (ingress)
269                 key = src;
270         else
271                 key = dest;
272 
273         if (proto == IPPROTO_TCP) {
274                 value = bpf_map_lookup_elem(&tcp_ports, &key);
275         } else {
276                 if (proto != IPPROTO_UDP)
277                         key = 0;
278 
279                 value = bpf_map_lookup_elem(&udp_ports, &key);
280         }
281 
282         if (value)
283                 *out_val = *value;
284 }
285 
286 static __always_inline void
287 check_flow_bulk(struct qosify_flow_config *config, struct __sk_buff *skb,
288                 struct flow_bucket *flow, __u8 *out_val)
289 {
290         bool trigger = false;
291         __s32 delta;
292         __u32 time;
293         int segs = 1;
294 
295         if (!config->bulk_trigger_pps)
296                 return;
297 
298         time = cur_time();
299         if (!flow->last_update)
300                 goto reset;
301 
302         delta = time - flow->last_update;
303         if ((u32)delta > FLOW_TIMEOUT)
304                 goto reset;
305 
306         if (skb->gso_segs)
307                 segs = skb->gso_segs;
308         flow->pkt_count += segs;
309         if (flow->pkt_count > config->bulk_trigger_pps) {
310                 flow->bulk_timeout = config->bulk_trigger_timeout + 1;
311                 trigger = true;
312         }
313 
314         if (delta >= FLOW_CHECK_INTERVAL) {
315                 if (flow->bulk_timeout && !trigger)
316                         flow->bulk_timeout--;
317 
318                 goto clear;
319         }
320 
321         goto out;
322 
323 reset:
324         flow->pkt_len_avg = 0;
325 clear:
326         flow->pkt_count = 1;
327         flow->last_update = time;
328 out:
329         if (flow->bulk_timeout)
330                 *out_val = config->dscp_bulk;
331 }
332 
333 static __always_inline void
334 check_flow_prio(struct qosify_flow_config *config, struct __sk_buff *skb,
335                 struct flow_bucket *flow, __u8 *out_val)
336 {
337         int cur_len = skb->len;
338 
339         if (flow->bulk_timeout)
340                 return;
341 
342         if (!config->prio_max_avg_pkt_len)
343                 return;
344 
345         if (skb->gso_segs > 1)
346                 cur_len /= skb->gso_segs;
347 
348         if (ewma(&flow->pkt_len_avg, cur_len) <= config->prio_max_avg_pkt_len)
349                 *out_val = config->dscp_prio;
350 }
351 
352 static __always_inline void
353 check_flow(struct qosify_flow_config *config, struct __sk_buff *skb,
354            __u8 *out_val)
355 {
356         struct flow_bucket flow_data;
357         struct flow_bucket *flow;
358         __u32 hash;
359 
360         if (!config)
361                 return;
362 
363         hash = bpf_get_hash_recalc(skb);
364         flow = bpf_map_lookup_elem(&flow_map, &hash);
365         if (!flow) {
366                 memset(&flow_data, 0, sizeof(flow_data));
367                 bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
368                 flow = bpf_map_lookup_elem(&flow_map, &hash);
369                 if (!flow)
370                         return;
371         }
372 
373         check_flow_bulk(config, skb, flow, out_val);
374         check_flow_prio(config, skb, flow, out_val);
375 }
376 
377 static __always_inline struct qosify_ip_map_val *
378 parse_ipv4(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
379            bool ingress, __u8 *out_val)
380 {
381         struct iphdr *iph;
382         __u8 ipproto;
383         int hdr_len;
384         void *key;
385 
386         iph = skb_ptr(skb, *offset);
387         if (skb_check(skb, iph + 1))
388                 return NULL;
389 
390         hdr_len = iph->ihl * 4;
391         if (bpf_skb_pull_data(skb, *offset + hdr_len + sizeof(struct udphdr)))
392                 return NULL;
393 
394         iph = skb_ptr(skb, *offset);
395         *offset += hdr_len;
396 
397         if (skb_check(skb, (void *)(iph + 1)))
398                 return NULL;
399 
400         ipproto = iph->protocol;
401         parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
402 
403         if (ingress)
404                 key = &iph->saddr;
405         else
406                 key = &iph->daddr;
407 
408         return bpf_map_lookup_elem(&ipv4_map, key);
409 }
410 
411 static __always_inline struct qosify_ip_map_val *
412 parse_ipv6(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
413            bool ingress, __u8 *out_val)
414 {
415         struct ipv6hdr *iph;
416         __u8 ipproto;
417         void *key;
418 
419         if (bpf_skb_pull_data(skb, *offset + sizeof(*iph) + sizeof(struct udphdr)))
420                 return NULL;
421 
422         iph = skb_ptr(skb, *offset);
423         *offset += sizeof(*iph);
424 
425         if (skb_check(skb, (void *)(iph + 1)))
426                 return NULL;
427 
428         ipproto = iph->nexthdr;
429         if (ingress)
430                 key = &iph->saddr;
431         else
432                 key = &iph->daddr;
433 
434         parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
435 
436         return bpf_map_lookup_elem(&ipv6_map, key);
437 }
438 
439 static __always_inline int
440 dscp_lookup_class(uint8_t *dscp, bool ingress, struct qosify_class **out_class)
441 {
442         struct qosify_class *class;
443         __u8 fallback_flag;
444         __u32 key;
445 
446         if (!(*dscp & QOSIFY_DSCP_CLASS_FLAG))
447                 return 0;
448 
449         fallback_flag = *dscp & QOSIFY_DSCP_FALLBACK_FLAG;
450         key = *dscp & QOSIFY_DSCP_VALUE_MASK;
451         class = bpf_map_lookup_elem(&class_map, &key);
452         if (!class)
453                 return -1;
454 
455         if (!(class->flags & QOSIFY_CLASS_FLAG_PRESENT))
456                 return -1;
457 
458         *dscp = dscp_val(&class->val, ingress);
459         *dscp |= fallback_flag;
460         *out_class = class;
461 
462         return 0;
463 }
464 
465 SEC("tc")
466 int classify(struct __sk_buff *skb)
467 {
468         bool ingress = module_flags & QOSIFY_INGRESS;
469         struct qosify_config *config;
470         struct qosify_class *class = NULL;
471         struct qosify_ip_map_val *ip_val;
472         __u32 offset = 0;
473         __u32 iph_offset;
474         void *iph;
475         __u8 dscp;
476         bool force;
477         int type;
478 
479         config = get_config();
480         if (!config)
481                 return TC_ACT_UNSPEC;
482 
483         if (module_flags & QOSIFY_IP_ONLY)
484                 type = skb->protocol;
485         else
486                 type = parse_ethernet(skb, &offset);
487 
488         iph_offset = offset;
489         if (type == bpf_htons(ETH_P_IP))
490                 ip_val = parse_ipv4(config, skb, &offset, ingress, &dscp);
491         else if (type == bpf_htons(ETH_P_IPV6))
492                 ip_val = parse_ipv6(config, skb, &offset, ingress, &dscp);
493         else
494                 return TC_ACT_UNSPEC;
495 
496         if (ip_val) {
497                 if (!ip_val->seen)
498                         ip_val->seen = 1;
499                 dscp = ip_val->dscp;
500         }
501 
502         if (dscp_lookup_class(&dscp, ingress, &class))
503                 return TC_ACT_UNSPEC;
504 
505         if (class) {
506                 check_flow(&class->config, skb, &dscp);
507 
508                 if (dscp_lookup_class(&dscp, ingress, &class))
509                         return TC_ACT_UNSPEC;
510         }
511 
512         dscp &= GENMASK(5, 0);
513         dscp <<= 2;
514         force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
515 
516         if (type == bpf_htons(ETH_P_IP))
517                 ipv4_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
518         else if (type == bpf_htons(ETH_P_IPV6))
519                 ipv6_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
520 
521         return TC_ACT_UNSPEC;
522 }
523 
524 char _license[] SEC("license") = "GPL";
525 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt