• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/qosify/qosify-bpf.c

  1 // SPDX-License-Identifier: GPL-2.0+
  2 /*
  3  * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
  4  */
  5 #define KBUILD_MODNAME "foo"
  6 #include <uapi/linux/bpf.h>
  7 #include <uapi/linux/if_ether.h>
  8 #include <uapi/linux/if_packet.h>
  9 #include <uapi/linux/ip.h>
 10 #include <uapi/linux/ipv6.h>
 11 #include <uapi/linux/in.h>
 12 #include <uapi/linux/tcp.h>
 13 #include <uapi/linux/udp.h>
 14 #include <uapi/linux/filter.h>
 15 #include <uapi/linux/pkt_cls.h>
 16 #include <linux/ip.h>
 17 #include <net/ipv6.h>
 18 #include <bpf/bpf_helpers.h>
 19 #include <bpf/bpf_endian.h>
 20 #include "bpf_skb_utils.h"
 21 #include "qosify-bpf.h"
 22 
 23 #define INET_ECN_MASK 3
 24 
 25 #define FLOW_CHECK_INTERVAL     ((u32)((1000000000ULL) >> 24))
 26 #define FLOW_TIMEOUT            ((u32)((30ULL * 1000000000ULL) >> 24))
 27 #define FLOW_BULK_TIMEOUT       5
 28 
 29 #define EWMA_SHIFT              12
 30 
 31 const volatile static uint32_t module_flags = 0;
 32 
 33 struct flow_bucket {
 34         __u32 last_update;
 35         __u32 pkt_len_avg;
 36         __u32 pkt_count;
 37         __u32 bulk_timeout;
 38 };
 39 
 40 struct {
 41         __uint(type, BPF_MAP_TYPE_ARRAY);
 42         __uint(pinning, 1);
 43         __type(key, __u32);
 44         __type(value, struct qosify_config);
 45         __uint(max_entries, 1);
 46 } config SEC(".maps");
 47 
 48 struct {
 49         __uint(type, BPF_MAP_TYPE_ARRAY);
 50         __uint(pinning, 1);
 51         __type(key, __u32);
 52         __type(value, __u8);
 53         __uint(max_entries, 1 << 16);
 54 } tcp_ports SEC(".maps");
 55 
 56 struct {
 57         __uint(type, BPF_MAP_TYPE_ARRAY);
 58         __uint(pinning, 1);
 59         __type(key, __u32);
 60         __type(value, __u8);
 61         __uint(max_entries, 1 << 16);
 62 } udp_ports SEC(".maps");
 63 
 64 struct {
 65         __uint(type, BPF_MAP_TYPE_LRU_HASH);
 66         __uint(pinning, 1);
 67         __type(key, __u32);
 68         __type(value, struct flow_bucket);
 69         __uint(max_entries, QOSIFY_FLOW_BUCKETS);
 70 } flow_map SEC(".maps");
 71 
 72 struct {
 73         __uint(type, BPF_MAP_TYPE_HASH);
 74         __uint(pinning, 1);
 75         __uint(key_size, sizeof(struct in_addr));
 76         __type(value, struct qosify_ip_map_val);
 77         __uint(max_entries, 100000);
 78         __uint(map_flags, BPF_F_NO_PREALLOC);
 79 } ipv4_map SEC(".maps");
 80 
 81 struct {
 82         __uint(type, BPF_MAP_TYPE_HASH);
 83         __uint(pinning, 1);
 84         __uint(key_size, sizeof(struct in6_addr));
 85         __type(value, struct qosify_ip_map_val);
 86         __uint(max_entries, 100000);
 87         __uint(map_flags, BPF_F_NO_PREALLOC);
 88 } ipv6_map SEC(".maps");
 89 
 90 struct {
 91         __uint(type, BPF_MAP_TYPE_ARRAY);
 92         __uint(pinning, 1);
 93         __type(key, __u32);
 94         __type(value, struct qosify_class);
 95         __uint(max_entries, QOSIFY_MAX_CLASS_ENTRIES +
 96                             QOSIFY_DEFAULT_CLASS_ENTRIES);
 97 } class_map SEC(".maps");
 98 
 99 static struct qosify_config *get_config(void)
100 {
101         __u32 key = 0;
102 
103         return bpf_map_lookup_elem(&config, &key);
104 }
105 
106 static __always_inline __u32 cur_time(void)
107 {
108         __u32 val = bpf_ktime_get_ns() >> 24;
109 
110         if (!val)
111                 val = 1;
112 
113         return val;
114 }
115 
116 static __always_inline __u32 ewma(__u32 *avg, __u32 val)
117 {
118         if (*avg)
119                 *avg = (*avg * 3) / 4 + (val << EWMA_SHIFT) / 4;
120         else
121                 *avg = val << EWMA_SHIFT;
122 
123         return *avg >> EWMA_SHIFT;
124 }
125 
126 static __always_inline __u8 dscp_val(struct qosify_dscp_val *val, bool ingress)
127 {
128         __u8 ival = val->ingress;
129         __u8 eval = val->egress;
130 
131         return ingress ? ival : eval;
132 }
133 
134 static __always_inline void
135 ipv4_change_dsfield(struct __sk_buff *skb, __u32 offset,
136                     __u8 mask, __u8 value, bool force)
137 {
138         struct iphdr *iph;
139         __u32 check;
140         __u8 dsfield;
141 
142         iph = skb_ptr(skb, offset, sizeof(*iph));
143         if (!iph)
144                 return;
145 
146         check = bpf_ntohs(iph->check);
147         if ((iph->tos & mask) && !force)
148                 return;
149 
150         dsfield = (iph->tos & mask) | value;
151         if (iph->tos == dsfield)
152                 return;
153 
154         check += iph->tos;
155         if ((check + 1) >> 16)
156                 check = (check + 1) & 0xffff;
157         check -= dsfield;
158         check += check >> 16;
159         iph->check = bpf_htons(check);
160         iph->tos = dsfield;
161 }
162 
163 static __always_inline void
164 ipv6_change_dsfield(struct __sk_buff *skb, __u32 offset,
165                     __u8 mask, __u8 value, bool force)
166 {
167         struct ipv6hdr *ipv6h;
168         __u16 *p;
169         __u16 val;
170 
171         ipv6h = skb_ptr(skb, offset, sizeof(*ipv6h));
172         if (!ipv6h)
173                 return;
174 
175         p = (__u16 *)ipv6h;
176         if (((*p >> 4) & mask) && !force)
177                 return;
178 
179         val = (*p & bpf_htons((((__u16)mask << 4) | 0xf00f))) | bpf_htons((__u16)value << 4);
180         if (val == *p)
181                 return;
182 
183         *p = val;
184 }
185 
186 static void
187 parse_l4proto(struct qosify_config *config, struct skb_parser_info *info,
188               bool ingress, __u8 *out_val)
189 {
190         struct udphdr *udp;
191         __u32 src, dest, key;
192         __u8 *value;
193         __u8 proto = info->proto;
194 
195         udp = skb_info_ptr(info, sizeof(*udp));
196         if (!udp)
197                 return;
198 
199         if (config && (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)) {
200                 *out_val = config->dscp_icmp;
201                 return;
202         }
203 
204         src = READ_ONCE(udp->source);
205         dest = READ_ONCE(udp->dest);
206         if (ingress)
207                 key = src;
208         else
209                 key = dest;
210 
211         if (proto == IPPROTO_TCP) {
212                 value = bpf_map_lookup_elem(&tcp_ports, &key);
213         } else {
214                 if (proto != IPPROTO_UDP)
215                         key = 0;
216 
217                 value = bpf_map_lookup_elem(&udp_ports, &key);
218         }
219 
220         if (value)
221                 *out_val = *value;
222 }
223 
224 static __always_inline bool
225 check_flow_bulk(struct qosify_flow_config *config, struct __sk_buff *skb,
226                 struct flow_bucket *flow, __u8 *out_val)
227 {
228         bool trigger = false;
229         __s32 delta;
230         __u32 time;
231         int segs = 1;
232         bool ret = false;
233 
234         if (!config->bulk_trigger_pps)
235                 return false;
236 
237         time = cur_time();
238         if (!flow->last_update)
239                 goto reset;
240 
241         delta = time - flow->last_update;
242         if ((u32)delta > FLOW_TIMEOUT)
243                 goto reset;
244 
245         if (skb->gso_segs)
246                 segs = skb->gso_segs;
247         flow->pkt_count += segs;
248         if (flow->pkt_count > config->bulk_trigger_pps) {
249                 flow->bulk_timeout = config->bulk_trigger_timeout + 1;
250                 trigger = true;
251         }
252 
253         if (delta >= FLOW_CHECK_INTERVAL) {
254                 if (flow->bulk_timeout && !trigger)
255                         flow->bulk_timeout--;
256 
257                 goto clear;
258         }
259 
260         goto out;
261 
262 reset:
263         flow->pkt_len_avg = 0;
264 clear:
265         flow->pkt_count = 1;
266         flow->last_update = time;
267 out:
268         if (flow->bulk_timeout) {
269                 *out_val = config->dscp_bulk;
270                 return true;
271         }
272 
273         return false;
274 }
275 
276 static __always_inline bool
277 check_flow_prio(struct qosify_flow_config *config, struct __sk_buff *skb,
278                 struct flow_bucket *flow, __u8 *out_val)
279 {
280         int cur_len = skb->len;
281 
282         if (flow->bulk_timeout)
283                 return false;
284 
285         if (!config->prio_max_avg_pkt_len)
286                 return false;
287 
288         if (skb->gso_segs > 1)
289                 cur_len /= skb->gso_segs;
290 
291         if (ewma(&flow->pkt_len_avg, cur_len) <= config->prio_max_avg_pkt_len) {
292                 *out_val = config->dscp_prio;
293                 return true;
294         }
295 
296         return false;
297 }
298 
299 static __always_inline bool
300 check_flow(struct qosify_flow_config *config, struct __sk_buff *skb,
301            __u8 *out_val)
302 {
303         struct flow_bucket flow_data;
304         struct flow_bucket *flow;
305         __u32 hash;
306         bool ret = false;
307 
308         if (!config)
309                 return false;
310 
311         if (!config->prio_max_avg_pkt_len && !config->bulk_trigger_pps)
312                 return false;
313 
314         hash = bpf_get_hash_recalc(skb);
315         flow = bpf_map_lookup_elem(&flow_map, &hash);
316         if (!flow) {
317                 memset(&flow_data, 0, sizeof(flow_data));
318                 bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
319                 flow = bpf_map_lookup_elem(&flow_map, &hash);
320                 if (!flow)
321                         return false;
322         }
323 
324         ret |= check_flow_bulk(config, skb, flow, out_val);
325         ret |= check_flow_prio(config, skb, flow, out_val);
326 
327         return ret;
328 }
329 
330 static __always_inline struct qosify_ip_map_val *
331 parse_ipv4(struct qosify_config *config, struct skb_parser_info *info,
332            bool ingress, __u8 *out_val)
333 {
334         struct iphdr *iph;
335         __u8 ipproto;
336         int hdr_len;
337         void *key;
338 
339         iph = skb_parse_ipv4(info, sizeof(struct udphdr));
340         if (!iph)
341                 return NULL;
342 
343         parse_l4proto(config, info, ingress, out_val);
344 
345         if (ingress)
346                 key = &iph->saddr;
347         else
348                 key = &iph->daddr;
349 
350         return bpf_map_lookup_elem(&ipv4_map, key);
351 }
352 
353 static __always_inline struct qosify_ip_map_val *
354 parse_ipv6(struct qosify_config *config, struct skb_parser_info *info,
355            bool ingress, __u8 *out_val)
356 {
357         struct ipv6hdr *iph;
358         __u8 ipproto;
359         void *key;
360 
361         iph = skb_parse_ipv6(info, sizeof(struct udphdr));
362         if (!iph)
363                 return NULL;
364 
365         if (ingress)
366                 key = &iph->saddr;
367         else
368                 key = &iph->daddr;
369 
370         parse_l4proto(config, info, ingress, out_val);
371 
372         return bpf_map_lookup_elem(&ipv6_map, key);
373 }
374 
375 static __always_inline int
376 dscp_lookup_class(uint8_t *dscp, bool ingress, struct qosify_class **out_class,
377                   bool counter)
378 {
379         struct qosify_class *class;
380         __u8 fallback_flag;
381         __u32 key;
382 
383         if (!(*dscp & QOSIFY_DSCP_CLASS_FLAG))
384                 return 0;
385 
386         fallback_flag = *dscp & QOSIFY_DSCP_FALLBACK_FLAG;
387         key = *dscp & QOSIFY_DSCP_VALUE_MASK;
388         class = bpf_map_lookup_elem(&class_map, &key);
389         if (!class)
390                 return -1;
391 
392         if (!(class->flags & QOSIFY_CLASS_FLAG_PRESENT))
393                 return -1;
394 
395         if (counter)
396             class->packets++;
397         *dscp = dscp_val(&class->val, ingress);
398         *dscp |= fallback_flag;
399         *out_class = class;
400 
401         return 0;
402 }
403 
404 SEC("tc")
405 int classify(struct __sk_buff *skb)
406 {
407         struct skb_parser_info info;
408         bool ingress = module_flags & QOSIFY_INGRESS;
409         struct qosify_config *config;
410         struct qosify_class *class = NULL;
411         struct qosify_ip_map_val *ip_val;
412         __u32 iph_offset;
413         __u8 dscp = 0;
414         void *iph;
415         bool force;
416         int type;
417 
418         config = get_config();
419         if (!config)
420                 return TC_ACT_UNSPEC;
421 
422         skb_parse_init(&info, skb);
423         if (module_flags & QOSIFY_IP_ONLY) {
424                 type = info.proto = skb->protocol;
425         } else if (skb_parse_ethernet(&info)) {
426                 skb_parse_vlan(&info);
427                 skb_parse_vlan(&info);
428                 type = info.proto;
429         } else {
430                 return TC_ACT_UNSPEC;
431         }
432 
433         iph_offset = info.offset;
434         if (type == bpf_htons(ETH_P_IP))
435                 ip_val = parse_ipv4(config, &info, ingress, &dscp);
436         else if (type == bpf_htons(ETH_P_IPV6))
437                 ip_val = parse_ipv6(config, &info, ingress, &dscp);
438         else
439                 return TC_ACT_UNSPEC;
440 
441         if (ip_val) {
442                 if (!ip_val->seen)
443                         ip_val->seen = 1;
444                 dscp = ip_val->dscp;
445         }
446 
447         if (dscp_lookup_class(&dscp, ingress, &class, true))
448                 return TC_ACT_UNSPEC;
449 
450         if (class) {
451                 if (check_flow(&class->config, skb, &dscp) &&
452                     dscp_lookup_class(&dscp, ingress, &class, false))
453                         return TC_ACT_UNSPEC;
454         }
455 
456         dscp &= GENMASK(5, 0);
457         dscp <<= 2;
458         force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
459 
460         if (type == bpf_htons(ETH_P_IP))
461                 ipv4_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
462         else if (type == bpf_htons(ETH_P_IPV6))
463                 ipv6_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
464 
465         return TC_ACT_UNSPEC;
466 }
467 
468 char _license[] SEC("license") = "GPL";
469 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt