• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/qosify/qosify-bpf.c

  1 // SPDX-License-Identifier: GPL-2.0+
  2 /*
  3  * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
  4  */
  5 #define KBUILD_MODNAME "foo"
  6 #include <uapi/linux/bpf.h>
  7 #include <uapi/linux/if_ether.h>
  8 #include <uapi/linux/if_packet.h>
  9 #include <uapi/linux/ip.h>
 10 #include <uapi/linux/ipv6.h>
 11 #include <uapi/linux/in.h>
 12 #include <uapi/linux/tcp.h>
 13 #include <uapi/linux/udp.h>
 14 #include <uapi/linux/filter.h>
 15 #include <uapi/linux/pkt_cls.h>
 16 #include <linux/ip.h>
 17 #include <net/ipv6.h>
 18 #include <bpf/bpf_helpers.h>
 19 #include <bpf/bpf_endian.h>
 20 #include "bpf_skb_utils.h"
 21 #include "qosify-bpf.h"
 22 
 23 #define INET_ECN_MASK 3
 24 
 25 #define FLOW_CHECK_INTERVAL     ((u32)((1000000000ULL) >> 24))
 26 #define FLOW_TIMEOUT            ((u32)((30ULL * 1000000000ULL) >> 24))
 27 #define FLOW_BULK_TIMEOUT       5
 28 
 29 #define EWMA_SHIFT              12
 30 
 31 const volatile static uint32_t module_flags = 0;
 32 
 33 struct flow_bucket {
 34         __u32 last_update;
 35         __u32 pkt_len_avg;
 36         __u32 pkt_count;
 37         __u32 bulk_timeout;
 38 };
 39 
 40 struct {
 41         __uint(type, BPF_MAP_TYPE_ARRAY);
 42         __uint(pinning, 1);
 43         __type(key, __u32);
 44         __type(value, struct qosify_config);
 45         __uint(max_entries, 1);
 46 } config SEC(".maps");
 47 
 48 struct {
 49         __uint(type, BPF_MAP_TYPE_ARRAY);
 50         __uint(pinning, 1);
 51         __type(key, __u32);
 52         __type(value, __u8);
 53         __uint(max_entries, 1 << 16);
 54 } tcp_ports SEC(".maps");
 55 
 56 struct {
 57         __uint(type, BPF_MAP_TYPE_ARRAY);
 58         __uint(pinning, 1);
 59         __type(key, __u32);
 60         __type(value, __u8);
 61         __uint(max_entries, 1 << 16);
 62 } udp_ports SEC(".maps");
 63 
 64 struct {
 65         __uint(type, BPF_MAP_TYPE_LRU_HASH);
 66         __uint(pinning, 1);
 67         __type(key, __u32);
 68         __type(value, struct flow_bucket);
 69         __uint(max_entries, QOSIFY_FLOW_BUCKETS);
 70 } flow_map SEC(".maps");
 71 
 72 struct {
 73         __uint(type, BPF_MAP_TYPE_HASH);
 74         __uint(pinning, 1);
 75         __uint(key_size, sizeof(struct in_addr));
 76         __type(value, struct qosify_ip_map_val);
 77         __uint(max_entries, 100000);
 78         __uint(map_flags, BPF_F_NO_PREALLOC);
 79 } ipv4_map SEC(".maps");
 80 
 81 struct {
 82         __uint(type, BPF_MAP_TYPE_HASH);
 83         __uint(pinning, 1);
 84         __uint(key_size, sizeof(struct in6_addr));
 85         __type(value, struct qosify_ip_map_val);
 86         __uint(max_entries, 100000);
 87         __uint(map_flags, BPF_F_NO_PREALLOC);
 88 } ipv6_map SEC(".maps");
 89 
 90 struct {
 91         __uint(type, BPF_MAP_TYPE_ARRAY);
 92         __uint(pinning, 1);
 93         __type(key, __u32);
 94         __type(value, struct qosify_class);
 95         __uint(max_entries, QOSIFY_MAX_CLASS_ENTRIES +
 96                             QOSIFY_DEFAULT_CLASS_ENTRIES);
 97 } class_map SEC(".maps");
 98 
 99 static struct qosify_config *get_config(void)
100 {
101         __u32 key = 0;
102 
103         return bpf_map_lookup_elem(&config, &key);
104 }
105 
106 static __always_inline __u32 cur_time(void)
107 {
108         __u32 val = bpf_ktime_get_ns() >> 24;
109 
110         if (!val)
111                 val = 1;
112 
113         return val;
114 }
115 
116 static __always_inline __u32 ewma(__u32 *avg, __u32 val)
117 {
118         if (*avg)
119                 *avg = (*avg * 3) / 4 + (val << EWMA_SHIFT) / 4;
120         else
121                 *avg = val << EWMA_SHIFT;
122 
123         return *avg >> EWMA_SHIFT;
124 }
125 
126 static __always_inline __u8 dscp_val(struct qosify_dscp_val *val, bool ingress)
127 {
128         __u8 ival = val->ingress;
129         __u8 eval = val->egress;
130 
131         return ingress ? ival : eval;
132 }
133 
134 static __always_inline void
135 ipv4_change_dsfield(struct __sk_buff *skb, __u32 offset,
136                     __u8 mask, __u8 value, bool force)
137 {
138         struct iphdr *iph;
139         __u32 check;
140         __u8 dsfield;
141 
142         iph = skb_ptr(skb, offset, sizeof(*iph));
143         if (!iph)
144                 return;
145 
146         check = bpf_ntohs(iph->check);
147         if ((iph->tos & mask) && !force)
148                 return;
149 
150         dsfield = (iph->tos & mask) | value;
151         if (iph->tos == dsfield)
152                 return;
153 
154         check += iph->tos;
155         if ((check + 1) >> 16)
156                 check = (check + 1) & 0xffff;
157         check -= dsfield;
158         check += check >> 16;
159         iph->check = bpf_htons(check);
160         iph->tos = dsfield;
161 }
162 
163 static __always_inline void
164 ipv6_change_dsfield(struct __sk_buff *skb, __u32 offset,
165                     __u8 mask, __u8 value, bool force)
166 {
167         struct ipv6hdr *ipv6h;
168         __u16 *p;
169         __u16 val;
170 
171         ipv6h = skb_ptr(skb, offset, sizeof(*ipv6h));
172         if (!ipv6h)
173                 return;
174 
175         p = (__u16 *)ipv6h;
176         if (((*p >> 4) & mask) && !force)
177                 return;
178 
179         val = (*p & bpf_htons((((__u16)mask << 4) | 0xf00f))) | bpf_htons((__u16)value << 4);
180         if (val == *p)
181                 return;
182 
183         *p = val;
184 }
185 
186 static void
187 parse_l4proto(struct qosify_config *config, struct skb_parser_info *info,
188               bool ingress, __u8 *out_val)
189 {
190         struct udphdr *udp;
191         __u32 src, dest, key;
192         __u8 *value;
193         __u8 proto = info->proto;
194 
195         udp = skb_info_ptr(info, sizeof(*udp));
196         if (!udp)
197                 return;
198 
199         if (config && (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)) {
200                 *out_val = config->dscp_icmp;
201                 return;
202         }
203 
204         src = READ_ONCE(udp->source);
205         dest = READ_ONCE(udp->dest);
206         if (ingress)
207                 key = src;
208         else
209                 key = dest;
210 
211         if (proto == IPPROTO_TCP) {
212                 value = bpf_map_lookup_elem(&tcp_ports, &key);
213         } else {
214                 if (proto != IPPROTO_UDP)
215                         key = 0;
216 
217                 value = bpf_map_lookup_elem(&udp_ports, &key);
218         }
219 
220         if (value)
221                 *out_val = *value;
222 }
223 
224 static __always_inline bool
225 check_flow_bulk(struct qosify_flow_config *config, struct __sk_buff *skb,
226                 struct flow_bucket *flow, __u8 *out_val)
227 {
228         bool trigger = false;
229         __s32 delta;
230         __u32 time;
231         int segs = 1;
232         bool ret = false;
233 
234         if (!config->bulk_trigger_pps)
235                 return false;
236 
237         time = cur_time();
238         if (!flow->last_update)
239                 goto reset;
240 
241         delta = time - flow->last_update;
242         if ((u32)delta > FLOW_TIMEOUT)
243                 goto reset;
244 
245         if (skb->gso_segs)
246                 segs = skb->gso_segs;
247         flow->pkt_count += segs;
248         if (flow->pkt_count > config->bulk_trigger_pps) {
249                 flow->bulk_timeout = config->bulk_trigger_timeout + 1;
250                 trigger = true;
251         }
252 
253         if (delta >= FLOW_CHECK_INTERVAL) {
254                 if (flow->bulk_timeout && !trigger)
255                         flow->bulk_timeout--;
256 
257                 goto clear;
258         }
259 
260         goto out;
261 
262 reset:
263         flow->pkt_len_avg = 0;
264 clear:
265         flow->pkt_count = 1;
266         flow->last_update = time;
267 out:
268         if (flow->bulk_timeout) {
269                 *out_val = config->dscp_bulk;
270                 return true;
271         }
272 
273         return false;
274 }
275 
276 static __always_inline bool
277 check_flow_prio(struct qosify_flow_config *config, struct __sk_buff *skb,
278                 struct flow_bucket *flow, __u8 *out_val)
279 {
280         int cur_len = skb->len;
281 
282         if (flow->bulk_timeout)
283                 return false;
284 
285         if (!config->prio_max_avg_pkt_len)
286                 return false;
287 
288         if (skb->gso_segs > 1)
289                 cur_len /= skb->gso_segs;
290 
291         if (ewma(&flow->pkt_len_avg, cur_len) <= config->prio_max_avg_pkt_len) {
292                 *out_val = config->dscp_prio;
293                 return true;
294         }
295 
296         return false;
297 }
298 
299 static __always_inline bool
300 check_flow(struct qosify_flow_config *config, struct __sk_buff *skb,
301            __u8 *out_val)
302 {
303         struct flow_bucket flow_data;
304         struct flow_bucket *flow;
305         __u32 hash;
306         bool ret = false;
307 
308         if (!config)
309                 return false;
310 
311         if (!config->prio_max_avg_pkt_len && !config->bulk_trigger_pps)
312                 return false;
313 
314         hash = bpf_get_hash_recalc(skb);
315         flow = bpf_map_lookup_elem(&flow_map, &hash);
316         if (!flow) {
317                 memset(&flow_data, 0, sizeof(flow_data));
318                 bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
319                 flow = bpf_map_lookup_elem(&flow_map, &hash);
320                 if (!flow)
321                         return false;
322         }
323 
324         ret |= check_flow_bulk(config, skb, flow, out_val);
325         ret |= check_flow_prio(config, skb, flow, out_val);
326 
327         return ret;
328 }
329 
330 static __always_inline struct qosify_ip_map_val *
331 parse_ipv4(struct qosify_config *config, struct skb_parser_info *info,
332            bool ingress, __u8 *out_val)
333 {
334         struct iphdr *iph;
335         __u8 ipproto;
336         int hdr_len;
337         void *key;
338 
339         iph = skb_parse_ipv4(info, sizeof(struct udphdr));
340         if (!iph)
341                 return NULL;
342 
343         parse_l4proto(config, info, ingress, out_val);
344 
345         if (ingress)
346                 key = &iph->saddr;
347         else
348                 key = &iph->daddr;
349 
350         return bpf_map_lookup_elem(&ipv4_map, key);
351 }
352 
353 static __always_inline struct qosify_ip_map_val *
354 parse_ipv6(struct qosify_config *config, struct skb_parser_info *info,
355            bool ingress, __u8 *out_val)
356 {
357         struct ipv6hdr *iph;
358         __u8 ipproto;
359         void *key;
360 
361         iph = skb_parse_ipv6(info, sizeof(struct udphdr));
362         if (!iph)
363                 return NULL;
364 
365         if (ingress)
366                 key = &iph->saddr;
367         else
368                 key = &iph->daddr;
369 
370         parse_l4proto(config, info, ingress, out_val);
371 
372         return bpf_map_lookup_elem(&ipv6_map, key);
373 }
374 
375 static __always_inline int
376 dscp_lookup_class(uint8_t *dscp, bool ingress, struct qosify_class **out_class)
377 {
378         struct qosify_class *class;
379         __u8 fallback_flag;
380         __u32 key;
381 
382         if (!(*dscp & QOSIFY_DSCP_CLASS_FLAG))
383                 return 0;
384 
385         fallback_flag = *dscp & QOSIFY_DSCP_FALLBACK_FLAG;
386         key = *dscp & QOSIFY_DSCP_VALUE_MASK;
387         class = bpf_map_lookup_elem(&class_map, &key);
388         if (!class)
389                 return -1;
390 
391         if (!(class->flags & QOSIFY_CLASS_FLAG_PRESENT))
392                 return -1;
393 
394         *dscp = dscp_val(&class->val, ingress);
395         *dscp |= fallback_flag;
396         *out_class = class;
397 
398         return 0;
399 }
400 
401 SEC("tc")
402 int classify(struct __sk_buff *skb)
403 {
404         struct skb_parser_info info;
405         bool ingress = module_flags & QOSIFY_INGRESS;
406         struct qosify_config *config;
407         struct qosify_class *class = NULL;
408         struct qosify_ip_map_val *ip_val;
409         __u32 iph_offset;
410         __u8 dscp = 0;
411         void *iph;
412         bool force;
413         int type;
414 
415         config = get_config();
416         if (!config)
417                 return TC_ACT_UNSPEC;
418 
419         skb_parse_init(&info, skb);
420         if (module_flags & QOSIFY_IP_ONLY) {
421                 type = info.proto = skb->protocol;
422         } else if (skb_parse_ethernet(&info)) {
423                 skb_parse_vlan(&info);
424                 skb_parse_vlan(&info);
425                 type = info.proto;
426         } else {
427                 return TC_ACT_UNSPEC;
428         }
429 
430         iph_offset = info.offset;
431         if (type == bpf_htons(ETH_P_IP))
432                 ip_val = parse_ipv4(config, &info, ingress, &dscp);
433         else if (type == bpf_htons(ETH_P_IPV6))
434                 ip_val = parse_ipv6(config, &info, ingress, &dscp);
435         else
436                 return TC_ACT_UNSPEC;
437 
438         if (ip_val) {
439                 if (!ip_val->seen)
440                         ip_val->seen = 1;
441                 dscp = ip_val->dscp;
442         }
443 
444         if (dscp_lookup_class(&dscp, ingress, &class))
445                 return TC_ACT_UNSPEC;
446 
447         if (class) {
448                 if (check_flow(&class->config, skb, &dscp) &&
449                     dscp_lookup_class(&dscp, ingress, &class))
450                         return TC_ACT_UNSPEC;
451         }
452 
453         dscp &= GENMASK(5, 0);
454         dscp <<= 2;
455         force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
456 
457         if (type == bpf_htons(ETH_P_IP))
458                 ipv4_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
459         else if (type == bpf_htons(ETH_P_IPV6))
460                 ipv6_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
461 
462         return TC_ACT_UNSPEC;
463 }
464 
465 char _license[] SEC("license") = "GPL";
466 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt