aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/xt_qtaguid_internal.h
blob: 6dc14a9c688966addc6ef949209fa70a70d00024 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/*
 * Kernel iptables module to track stats for packets based on user tags.
 *
 * (C) 2011 Google, Inc
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#ifndef __XT_QTAGUID_INTERNAL_H__
#define __XT_QTAGUID_INTERNAL_H__

#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/spinlock_types.h>
#include <linux/workqueue.h>

/* Iface handling */
#define IDEBUG_MASK (1<<0)
/* Iptable Matching. Per packet. */
#define MDEBUG_MASK (1<<1)
/* Red-black tree handling. Per packet. */
#define RDEBUG_MASK (1<<2)
/* procfs ctrl/stats handling */
#define CDEBUG_MASK (1<<3)
/* dev and resource tracking */
#define DDEBUG_MASK (1<<4)

/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
#define DEFAULT_DEBUG_MASK 0

/*
 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
 */
#define IDEBUG
#define MDEBUG
#define RDEBUG
#define CDEBUG
#define DDEBUG

#define MSK_DEBUG(mask, ...) do {                           \
		if (unlikely(qtaguid_debug_mask & (mask)))  \
			pr_debug(__VA_ARGS__);              \
	} while (0)
#ifdef IDEBUG
#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
#else
#define IF_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef MDEBUG
#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
#else
#define MT_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef RDEBUG
#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
#else
#define RB_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef CDEBUG
#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
#else
#define CT_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef DDEBUG
#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
#else
#define DR_DEBUG(...) no_printk(__VA_ARGS__)
#endif

extern uint qtaguid_debug_mask;

/*---------------------------------------------------------------------------*/
/*
 * Tags:
 *
 * They represent what the data usage counters will be tracked against.
 * By default a tag is just based on the UID.
 * The UID is used as the base for policing, and can not be ignored.
 * So a tag will always at least represent a UID (uid_tag).
 *
 * A tag can be augmented with an "accounting tag" which is associated
 * with a UID.
 * User space can set the acct_tag portion of the tag which is then used
 * with sockets: all data belonging to that socket will be counted against the
 * tag. The policing is then based on the tag's uid_tag portion,
 * and stats are collected for the acct_tag portion separately.
 *
 * There could be
 * a:  {acct_tag=1, uid_tag=10003}
 * b:  {acct_tag=2, uid_tag=10003}
 * c:  {acct_tag=3, uid_tag=10003}
 * d:  {acct_tag=0, uid_tag=10003}
 * a, b, and c represent tags associated with specific sockets.
 * d is for the totals for that uid, including all untagged traffic.
 * Typically d is used with policing/quota rules.
 *
 * We want tag_t big enough to distinguish uid_t and acct_tag.
 * It might become a struct if needed.
 * Nothing should be using it as an int.
 */
typedef uint64_t tag_t;  /* Only used via accessors */

#define TAG_UID_MASK 0xFFFFFFFFULL
#define TAG_ACCT_MASK (~0xFFFFFFFFULL)

static inline int tag_compare(tag_t t1, tag_t t2)
{
	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
}

static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
{
	return acct_tag | uid;
}
static inline tag_t make_tag_from_uid(uid_t uid)
{
	return uid;
}
static inline uid_t get_uid_from_tag(tag_t tag)
{
	return tag & TAG_UID_MASK;
}
static inline tag_t get_utag_from_tag(tag_t tag)
{
	return tag & TAG_UID_MASK;
}
static inline tag_t get_atag_from_tag(tag_t tag)
{
	return tag & TAG_ACCT_MASK;
}

static inline bool valid_atag(tag_t tag)
{
	return !(tag & TAG_UID_MASK);
}
static inline tag_t make_atag_from_value(uint32_t value)
{
	return (uint64_t)value << 32;
}
/*---------------------------------------------------------------------------*/

/*
 * Maximum number of socket tags that a UID is allowed to have active.
 * Multiple processes belonging to the same UID contribute towards this limit.
 * Special UIDs that can impersonate a UID also contribute (e.g. download
 * manager, ...)
 */
#define DEFAULT_MAX_SOCK_TAGS 1024

/*
 * For now we only track 2 sets of counters.
 * The default set is 0.
 * Userspace can activate another set for a given uid being tracked.
 */
#define IFS_MAX_COUNTER_SETS 2

enum ifs_tx_rx {
	IFS_TX,
	IFS_RX,
	IFS_MAX_DIRECTIONS
};

/* For now, TCP, UDP, the rest */
enum ifs_proto {
	IFS_TCP,
	IFS_UDP,
	IFS_PROTO_OTHER,
	IFS_MAX_PROTOS
};

struct byte_packet_counters {
	uint64_t bytes;
	uint64_t packets;
};

struct data_counters {
	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
};

static inline uint64_t dc_sum_bytes(struct data_counters *counters,
				    int set,
				    enum ifs_tx_rx direction)
{
	return counters->bpc[set][direction][IFS_TCP].bytes
		+ counters->bpc[set][direction][IFS_UDP].bytes
		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
}

static inline uint64_t dc_sum_packets(struct data_counters *counters,
				      int set,
				      enum ifs_tx_rx direction)
{
	return counters->bpc[set][direction][IFS_TCP].packets
		+ counters->bpc[set][direction][IFS_UDP].packets
		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
}


/* Generic X based nodes used as a base for rb_tree ops */
struct tag_node {
	struct rb_node node;
	tag_t tag;
};

struct tag_stat {
	struct tag_node tn;
	struct data_counters counters;
	/*
	 * If this tag is acct_tag based, we need to count against the
	 * matching parent uid_tag.
	 */
	struct data_counters *parent_counters;
};

struct iface_stat {
	struct list_head list;  /* in iface_stat_list */
	char *ifname;
	bool active;
	/* net_dev is only valid for active iface_stat */
	struct net_device *net_dev;

	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
	struct data_counters totals_via_skb;
	/*
	 * We keep the last_known, because some devices reset their counters
	 * just before NETDEV_UP, while some will reset just before
	 * NETDEV_REGISTER (which is more normal).
	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
	 * its current dev stats smaller that what was previously known, we
	 * assume an UNREGISTER and just use the last_known.
	 */
	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
	/* last_known is usable when last_known_valid is true */
	bool last_known_valid;

	struct proc_dir_entry *proc_ptr;

	struct rb_root tag_stat_tree;
	spinlock_t tag_stat_list_lock;
};

/* This is needed to create proc_dir_entries from atomic context. */
struct iface_stat_work {
	struct work_struct iface_work;
	struct iface_stat *iface_entry;
};

/*
 * Track tag that this socket is transferring data for, and not necessarily
 * the uid that owns the socket.
 * This is the tag against which tag_stat.counters will be billed.
 * These structs need to be looked up by sock and pid.
 */
struct sock_tag {
	struct rb_node sock_node;
	struct sock *sk;  /* Only used as a number, never dereferenced */
	/* The socket is needed for sockfd_put() */
	struct socket *socket;
	/* Used to associate with a given pid */
	struct list_head list;   /* in proc_qtu_data.sock_tag_list */
	pid_t pid;

	tag_t tag;
};

struct qtaguid_event_counts {
	/* Various successful events */
	atomic64_t sockets_tagged;
	atomic64_t sockets_untagged;
	atomic64_t counter_set_changes;
	atomic64_t delete_cmds;
	atomic64_t iface_events;  /* Number of NETDEV_* events handled */

	atomic64_t match_calls;   /* Number of times iptables called mt */
	/* Number of times iptables called mt from pre or post routing hooks */
	atomic64_t match_calls_prepost;
	/*
	 * match_found_sk_*: numbers related to the netfilter matching
	 * function finding a sock for the sk_buff.
	 * Total skbs processed is sum(match_found*).
	 */
	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
	/* The connection tracker had or didn't have the sk. */
	atomic64_t match_found_sk_in_ct;
	atomic64_t match_found_no_sk_in_ct;
	/*
	 * No sk could be found. No apparent owner. Could happen with
	 * unsolicited traffic.
	 */
	atomic64_t match_no_sk;
	/*
	 * The file ptr in the sk_socket wasn't there.
	 * This might happen for traffic while the socket is being closed.
	 */
	atomic64_t match_no_sk_file;
};

/* Track the set active_set for the given tag. */
struct tag_counter_set {
	struct tag_node tn;
	int active_set;
};

/*----------------------------------------------*/
/*
 * The qtu uid data is used to track resources that are created directly or
 * indirectly by processes (uid tracked).
 * It is shared by the processes with the same uid.
 * Some of the resource will be counted to prevent further rogue allocations,
 * some will need freeing once the owner process (uid) exits.
 */
struct uid_tag_data {
	struct rb_node node;
	uid_t uid;

	/*
	 * For the uid, how many accounting tags have been set.
	 */
	int num_active_tags;
	/* Track the number of proc_qtu_data that reference it */
	int num_pqd;
	struct rb_root tag_ref_tree;
	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
};

struct tag_ref {
	struct tag_node tn;

	/*
	 * This tracks the number of active sockets that have a tag on them
	 * which matches this tag_ref.tn.tag.
	 * A tag ref can live on after the sockets are untagged.
	 * A tag ref can only be removed during a tag delete command.
	 */
	int num_sock_tags;
};

struct proc_qtu_data {
	struct rb_node node;
	pid_t pid;

	struct uid_tag_data *parent_tag_data;

	/* Tracks the sock_tags that need freeing upon this proc's death */
	struct list_head sock_tag_list;
	/* No spinlock_t sock_tag_list_lock; use the global one. */
};

/*----------------------------------------------*/
#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */