Created
November 23, 2011 06:52
-
-
Save shakalaca/1388048 to your computer and use it in GitHub Desktop.
patch for mobile data limitation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- ./frameworks/base/packages/SettingsProvider/src/com/android/providers/settings/DatabaseHelper.java~ 2011-11-15 08:04:40.000000000 +0800 | |
+++ ./frameworks/base/packages/SettingsProvider/src/com/android/providers/settings/DatabaseHelper.java 2011-11-23 11:26:00.781355800 +0800 | |
@@ -1472,6 +1472,9 @@ | |
loadBooleanSetting(stmt, Settings.Secure.TOUCH_EXPLORATION_ENABLED, | |
R.bool.def_touch_exploration_enabled); | |
+ | |
+ loadBooleanSetting(stmt, Settings.Secure.NETSTATS_ENABLED, | |
+ R.bool.def_netstats_enabled); | |
} finally { | |
if (stmt != null) stmt.close(); | |
} | |
--- ./frameworks/base/packages/SettingsProvider/res/values/defaults.xml~ 2011-11-15 08:04:40.000000000 +0800 | |
+++ ./frameworks/base/packages/SettingsProvider/res/values/defaults.xml 2011-11-23 11:27:29.125356141 +0800 | |
@@ -133,4 +133,7 @@ | |
<bool name="def_dtmf_tones_enabled">true</bool> | |
<!-- Default for UI touch sounds enabled --> | |
<bool name="def_sound_effects_enabled">true</bool> | |
+ | |
+ <!-- Default for Mobile Data Usage Limitiation enabled --> | |
+ <bool name="def_netstats_enabled">true</bool> | |
</resources> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/arch/arm/configs/herring_defconfig b/arch/arm/configs/herring_defconfig | |
index 201d9c7..ea4490f 100755 | |
--- a/arch/arm/configs/herring_defconfig | |
+++ b/arch/arm/configs/herring_defconfig | |
@@ -82,6 +82,7 @@ CONFIG_NF_CONNTRACK_SANE=y | |
CONFIG_NF_CONNTRACK_SIP=y | |
CONFIG_NF_CONNTRACK_TFTP=y | |
CONFIG_NF_CT_NETLINK=y | |
+CONFIG_NETFILTER_TPROXY=y | |
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y | |
CONFIG_NETFILTER_XT_TARGET_CONNMARK=y | |
CONFIG_NETFILTER_XT_TARGET_LED=y | |
@@ -99,10 +100,13 @@ CONFIG_NETFILTER_XT_MATCH_LENGTH=y | |
CONFIG_NETFILTER_XT_MATCH_LIMIT=y | |
CONFIG_NETFILTER_XT_MATCH_MAC=y | |
CONFIG_NETFILTER_XT_MATCH_MARK=y | |
-CONFIG_NETFILTER_XT_MATCH_OWNER=y | |
CONFIG_NETFILTER_XT_MATCH_POLICY=y | |
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y | |
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y | |
CONFIG_NETFILTER_XT_MATCH_QUOTA=y | |
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y | |
+CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y | |
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y | |
CONFIG_NETFILTER_XT_MATCH_STATE=y | |
CONFIG_NETFILTER_XT_MATCH_STATISTIC=y | |
CONFIG_NETFILTER_XT_MATCH_STRING=y | |
@@ -121,9 +125,17 @@ CONFIG_NF_NAT=y | |
CONFIG_IP_NF_TARGET_MASQUERADE=y | |
CONFIG_IP_NF_TARGET_NETMAP=y | |
CONFIG_IP_NF_TARGET_REDIRECT=y | |
+CONFIG_IP_NF_MANGLE=y | |
CONFIG_IP_NF_ARPTABLES=y | |
CONFIG_IP_NF_ARPFILTER=y | |
CONFIG_IP_NF_ARP_MANGLE=y | |
+CONFIG_IP6_NF_IPTABLES=y | |
+CONFIG_IP6_NF_TARGET_LOG=y | |
+CONFIG_IP6_NF_FILTER=y | |
+CONFIG_IP6_NF_TARGET_REJECT=y | |
+CONFIG_IP6_NF_TARGET_REJECT_SKERR=y | |
+CONFIG_IP6_NF_MANGLE=y | |
+CONFIG_IP6_NF_RAW=y | |
CONFIG_NET_SCHED=y | |
CONFIG_NET_SCH_HTB=y | |
CONFIG_NET_SCH_INGRESS=y | |
diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h | |
index 7f16a14..0f904b3 100644 | |
--- a/include/linux/android_aid.h | |
+++ b/include/linux/android_aid.h | |
@@ -22,5 +22,7 @@ | |
#define AID_INET 3003 | |
#define AID_NET_RAW 3004 | |
#define AID_NET_ADMIN 3005 | |
+#define AID_NET_BW_STATS 3006 /* read bandwidth statistics */ | |
+#define AID_NET_BW_ACCT 3007 /* change bandwidth statistics accounting */ | |
#endif | |
diff --git a/include/linux/netfilter/xt_qtaguid.h b/include/linux/netfilter/xt_qtaguid.h | |
new file mode 100644 | |
index 0000000..ca60fbd | |
--- /dev/null | |
+++ b/include/linux/netfilter/xt_qtaguid.h | |
@@ -0,0 +1,13 @@ | |
+#ifndef _XT_QTAGUID_MATCH_H | |
+#define _XT_QTAGUID_MATCH_H | |
+ | |
+/* For now we just replace the xt_owner. | |
+ * FIXME: make iptables aware of qtaguid. */ | |
+#include <linux/netfilter/xt_owner.h> | |
+ | |
+#define XT_QTAGUID_UID XT_OWNER_UID | |
+#define XT_QTAGUID_GID XT_OWNER_GID | |
+#define XT_QTAGUID_SOCKET XT_OWNER_SOCKET | |
+#define xt_qtaguid_match_info xt_owner_match_info | |
+ | |
+#endif /* _XT_QTAGUID_MATCH_H */ | |
diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h | |
new file mode 100644 | |
index 0000000..eadc690 | |
--- /dev/null | |
+++ b/include/linux/netfilter/xt_quota2.h | |
@@ -0,0 +1,25 @@ | |
+#ifndef _XT_QUOTA_H | |
+#define _XT_QUOTA_H | |
+ | |
+enum xt_quota_flags { | |
+ XT_QUOTA_INVERT = 1 << 0, | |
+ XT_QUOTA_GROW = 1 << 1, | |
+ XT_QUOTA_PACKET = 1 << 2, | |
+ XT_QUOTA_NO_CHANGE = 1 << 3, | |
+ XT_QUOTA_MASK = 0x0F, | |
+}; | |
+ | |
+struct xt_quota_counter; | |
+ | |
+struct xt_quota_mtinfo2 { | |
+ char name[15]; | |
+ u_int8_t flags; | |
+ | |
+ /* Comparison-invariant */ | |
+ aligned_u64 quota; | |
+ | |
+ /* Used internally by the kernel */ | |
+ struct xt_quota_counter *master __attribute__((aligned(8))); | |
+}; | |
+ | |
+#endif /* _XT_QUOTA_H */ | |
diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h | |
index 6f475b8..6c1979a 100644 | |
--- a/include/linux/netfilter/xt_socket.h | |
+++ b/include/linux/netfilter/xt_socket.h | |
@@ -9,4 +9,9 @@ struct xt_socket_mtinfo1 { | |
__u8 flags; | |
}; | |
+void xt_socket_put_sk(struct sock *sk); | |
+struct sock *xt_socket_get4_sk(const struct sk_buff *skb, | |
+ struct xt_action_param *par); | |
+struct sock *xt_socket_get6_sk(const struct sk_buff *skb, | |
+ struct xt_action_param *par); | |
#endif /* _XT_SOCKET_H */ | |
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h | |
index 208b46f..e505358 100644 | |
--- a/include/net/netfilter/nf_tproxy_core.h | |
+++ b/include/net/netfilter/nf_tproxy_core.h | |
@@ -5,28 +5,204 @@ | |
#include <linux/in.h> | |
#include <linux/skbuff.h> | |
#include <net/sock.h> | |
-#include <net/inet_sock.h> | |
+#include <net/inet_hashtables.h> | |
+#include <net/inet6_hashtables.h> | |
#include <net/tcp.h> | |
+#define NFT_LOOKUP_ANY 0 | |
+#define NFT_LOOKUP_LISTENER 1 | |
+#define NFT_LOOKUP_ESTABLISHED 2 | |
+ | |
/* look up and get a reference to a matching socket */ | |
-extern struct sock * | |
+ | |
+ | |
+/* This function is used by the 'TPROXY' target and the 'socket' | |
+ * match. The following lookups are supported: | |
+ * | |
+ * Explicit TProxy target rule | |
+ * =========================== | |
+ * | |
+ * This is used when the user wants to intercept a connection matching | |
+ * an explicit iptables rule. In this case the sockets are assumed | |
+ * matching in preference order: | |
+ * | |
+ * - match: if there's a fully established connection matching the | |
+ * _packet_ tuple, it is returned, assuming the redirection | |
+ * already took place and we process a packet belonging to an | |
+ * established connection | |
+ * | |
+ * - match: if there's a listening socket matching the redirection | |
+ * (e.g. on-port & on-ip of the connection), it is returned, | |
+ * regardless if it was bound to 0.0.0.0 or an explicit | |
+ * address. The reasoning is that if there's an explicit rule, it | |
+ * does not really matter if the listener is bound to an interface | |
+ * or to 0. The user already stated that he wants redirection | |
+ * (since he added the rule). | |
+ * | |
+ * "socket" match based redirection (no specific rule) | |
+ * =================================================== | |
+ * | |
+ * There are connections with dynamic endpoints (e.g. FTP data | |
+ * connection) that the user is unable to add explicit rules | |
+ * for. These are taken care of by a generic "socket" rule. It is | |
+ * assumed that the proxy application is trusted to open such | |
+ * connections without explicit iptables rule (except of course the | |
+ * generic 'socket' rule). In this case the following sockets are | |
+ * matched in preference order: | |
+ * | |
+ * - match: if there's a fully established connection matching the | |
+ * _packet_ tuple | |
+ * | |
+ * - match: if there's a non-zero bound listener (possibly with a | |
+ * non-local address) We don't accept zero-bound listeners, since | |
+ * then local services could intercept traffic going through the | |
+ * box. | |
+ * | |
+ * Please note that there's an overlap between what a TPROXY target | |
+ * and a socket match will match. Normally if you have both rules the | |
+ * "socket" match will be the first one, effectively all packets | |
+ * belonging to established connections going through that one. | |
+ */ | |
+static inline struct sock * | |
nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, | |
const __be32 saddr, const __be32 daddr, | |
const __be16 sport, const __be16 dport, | |
- const struct net_device *in, bool listening); | |
+ const struct net_device *in, int lookup_type) | |
+{ | |
+ struct sock *sk; | |
+ | |
+ /* look up socket */ | |
+ switch (protocol) { | |
+ case IPPROTO_TCP: | |
+ switch (lookup_type) { | |
+ case NFT_LOOKUP_ANY: | |
+ sk = __inet_lookup(net, &tcp_hashinfo, | |
+ saddr, sport, daddr, dport, | |
+ in->ifindex); | |
+ break; | |
+ case NFT_LOOKUP_LISTENER: | |
+ sk = inet_lookup_listener(net, &tcp_hashinfo, | |
+ daddr, dport, | |
+ in->ifindex); | |
-static inline void | |
-nf_tproxy_put_sock(struct sock *sk) | |
+ /* NOTE: we return listeners even if bound to | |
+ * 0.0.0.0, those are filtered out in | |
+ * xt_socket, since xt_TPROXY needs 0 bound | |
+ * listeners too */ | |
+ | |
+ break; | |
+ case NFT_LOOKUP_ESTABLISHED: | |
+ sk = inet_lookup_established(net, &tcp_hashinfo, | |
+ saddr, sport, daddr, dport, | |
+ in->ifindex); | |
+ break; | |
+ default: | |
+ WARN_ON(1); | |
+ sk = NULL; | |
+ break; | |
+ } | |
+ break; | |
+ case IPPROTO_UDP: | |
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, | |
+ in->ifindex); | |
+ if (sk && lookup_type != NFT_LOOKUP_ANY) { | |
+ int connected = (sk->sk_state == TCP_ESTABLISHED); | |
+ int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); | |
+ | |
+ /* NOTE: we return listeners even if bound to | |
+ * 0.0.0.0, those are filtered out in | |
+ * xt_socket, since xt_TPROXY needs 0 bound | |
+ * listeners too */ | |
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || | |
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) { | |
+ sock_put(sk); | |
+ sk = NULL; | |
+ } | |
+ } | |
+ break; | |
+ default: | |
+ WARN_ON(1); | |
+ sk = NULL; | |
+ } | |
+ | |
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", | |
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); | |
+ | |
+ return sk; | |
+} | |
+ | |
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
+static inline struct sock * | |
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, | |
+ const struct in6_addr *saddr, const struct in6_addr *daddr, | |
+ const __be16 sport, const __be16 dport, | |
+ const struct net_device *in, int lookup_type) | |
{ | |
- /* TIME_WAIT inet sockets have to be handled differently */ | |
- if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) | |
- inet_twsk_put(inet_twsk(sk)); | |
- else | |
- sock_put(sk); | |
+ struct sock *sk; | |
+ | |
+ /* look up socket */ | |
+ switch (protocol) { | |
+ case IPPROTO_TCP: | |
+ switch (lookup_type) { | |
+ case NFT_LOOKUP_ANY: | |
+ sk = inet6_lookup(net, &tcp_hashinfo, | |
+ saddr, sport, daddr, dport, | |
+ in->ifindex); | |
+ break; | |
+ case NFT_LOOKUP_LISTENER: | |
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, | |
+ daddr, ntohs(dport), | |
+ in->ifindex); | |
+ | |
+ /* NOTE: we return listeners even if bound to | |
+ * 0.0.0.0, those are filtered out in | |
+ * xt_socket, since xt_TPROXY needs 0 bound | |
+ * listeners too */ | |
+ | |
+ break; | |
+ case NFT_LOOKUP_ESTABLISHED: | |
+ sk = __inet6_lookup_established(net, &tcp_hashinfo, | |
+ saddr, sport, daddr, ntohs(dport), | |
+ in->ifindex); | |
+ break; | |
+ default: | |
+ WARN_ON(1); | |
+ sk = NULL; | |
+ break; | |
+ } | |
+ break; | |
+ case IPPROTO_UDP: | |
+ sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, | |
+ in->ifindex); | |
+ if (sk && lookup_type != NFT_LOOKUP_ANY) { | |
+ int connected = (sk->sk_state == TCP_ESTABLISHED); | |
+ int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); | |
+ | |
+ /* NOTE: we return listeners even if bound to | |
+ * 0.0.0.0, those are filtered out in | |
+ * xt_socket, since xt_TPROXY needs 0 bound | |
+ * listeners too */ | |
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || | |
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) { | |
+ sock_put(sk); | |
+ sk = NULL; | |
+ } | |
+ } | |
+ break; | |
+ default: | |
+ WARN_ON(1); | |
+ sk = NULL; | |
+ } | |
+ | |
+ pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", | |
+ protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); | |
+ | |
+ return sk; | |
} | |
+#endif | |
/* assign a socket to the skb -- consumes sk */ | |
-int | |
+void | |
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); | |
#endif | |
diff --git a/include/net/udp.h b/include/net/udp.h | |
index 4201dc8..df6e827 100644 | |
--- a/include/net/udp.h | |
+++ b/include/net/udp.h | |
@@ -185,6 +185,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |
extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, | |
__be32 daddr, __be16 dport, | |
int dif); | |
+extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, | |
+ const struct in6_addr *daddr, __be16 dport, | |
+ int dif); | |
/* | |
* SNMP statistics for UDP and UDP-Lite | |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c | |
index 97e1214..a70cefe 100644 | |
--- a/net/ipv6/udp.c | |
+++ b/net/ipv6/udp.c | |
@@ -320,6 +320,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, | |
udptable); | |
} | |
+struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, | |
+ const struct in6_addr *daddr, __be16 dport, int dif) | |
+{ | |
+ return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); | |
+} | |
+EXPORT_SYMBOL_GPL(udp6_lib_lookup); | |
+ | |
/* | |
* This should be easy, if there is something there we | |
* return it, otherwise we block. | |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig | |
index 8593a77..3911fa4 100644 | |
--- a/net/netfilter/Kconfig | |
+++ b/net/netfilter/Kconfig | |
@@ -837,6 +837,22 @@ config NETFILTER_XT_MATCH_PKTTYPE | |
To compile it as a module, choose M here. If unsure, say N. | |
+config NETFILTER_XT_MATCH_QTAGUID | |
+ bool '"quota, tag, owner" match and stats support' | |
+ depends on NETFILTER_XT_MATCH_SOCKET | |
+ depends on NETFILTER_XT_MATCH_OWNER=n | |
+ help | |
+ This option replaces the `owner' match. In addition to matching | |
+ on uid, it keeps stats based on a tag assigned to a socket. | |
+ The full tag is comprised of a UID and an accounting tag. | |
+ The tags are assignable to sockets from user space (e.g. a download | |
+ manager can assign the socket to another UID for accounting). | |
+ Stats and control are done via /proc/net/xt_qtaguid/. | |
+ It replaces owner as it takes the same arguments, but should | |
+ really be recognized by the iptables tool. | |
+ | |
+ If unsure, say `N'. | |
+ | |
config NETFILTER_XT_MATCH_QUOTA | |
tristate '"quota" match support' | |
depends on NETFILTER_ADVANCED | |
@@ -847,6 +863,30 @@ config NETFILTER_XT_MATCH_QUOTA | |
If you want to compile it as a module, say M here and read | |
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'. | |
+config NETFILTER_XT_MATCH_QUOTA2 | |
+ tristate '"quota2" match support' | |
+ depends on NETFILTER_ADVANCED | |
+ help | |
+ This option adds a `quota2' match, which allows to match on a | |
+ byte counter correctly and not per CPU. | |
+ It allows naming the quotas. | |
+ This is based on http://xtables-addons.git.sourceforge.net | |
+ | |
+ If you want to compile it as a module, say M here and read | |
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. | |
+ | |
+config NETFILTER_XT_MATCH_QUOTA2_LOG | |
+ bool '"quota2" Netfilter LOG support' | |
+ depends on NETFILTER_XT_MATCH_QUOTA2 | |
+ depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no | |
+ default n | |
+ help | |
+ This option allows `quota2' to log ONCE when a quota limit | |
+ is passed. It logs via NETLINK using the NETLINK_NFLOG family. | |
+ It logs similarly to how ipt_ULOG would without data. | |
+ | |
+ If unsure, say `N'. | |
+ | |
config NETFILTER_XT_MATCH_RATEEST | |
tristate '"rateest" match support' | |
depends on NETFILTER_ADVANCED | |
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile | |
index 14e3a8f..8c1d523 100644 | |
--- a/net/netfilter/Makefile | |
+++ b/net/netfilter/Makefile | |
@@ -84,7 +84,9 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o | |
+obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o | |
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o | |
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c | |
index 5490fc3..474d621 100644 | |
--- a/net/netfilter/nf_tproxy_core.c | |
+++ b/net/netfilter/nf_tproxy_core.c | |
@@ -18,41 +18,6 @@ | |
#include <net/udp.h> | |
#include <net/netfilter/nf_tproxy_core.h> | |
-struct sock * | |
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, | |
- const __be32 saddr, const __be32 daddr, | |
- const __be16 sport, const __be16 dport, | |
- const struct net_device *in, bool listening_only) | |
-{ | |
- struct sock *sk; | |
- | |
- /* look up socket */ | |
- switch (protocol) { | |
- case IPPROTO_TCP: | |
- if (listening_only) | |
- sk = __inet_lookup_listener(net, &tcp_hashinfo, | |
- daddr, ntohs(dport), | |
- in->ifindex); | |
- else | |
- sk = __inet_lookup(net, &tcp_hashinfo, | |
- saddr, sport, daddr, dport, | |
- in->ifindex); | |
- break; | |
- case IPPROTO_UDP: | |
- sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, | |
- in->ifindex); | |
- break; | |
- default: | |
- WARN_ON(1); | |
- sk = NULL; | |
- } | |
- | |
- pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", | |
- protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); | |
- | |
- return sk; | |
-} | |
-EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); | |
static void | |
nf_tproxy_destructor(struct sk_buff *skb) | |
@@ -63,22 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb) | |
skb->destructor = NULL; | |
if (sk) | |
- nf_tproxy_put_sock(sk); | |
+ sock_put(sk); | |
} | |
/* consumes sk */ | |
-int | |
+void | |
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) | |
{ | |
- if (inet_sk(sk)->transparent) { | |
- skb_orphan(skb); | |
- skb->sk = sk; | |
- skb->destructor = nf_tproxy_destructor; | |
- return 1; | |
- } else | |
- nf_tproxy_put_sock(sk); | |
+ /* assigning tw sockets complicates things; most | |
+ * skb->sk->X checks would have to test sk->sk_state first */ | |
+ if (sk->sk_state == TCP_TIME_WAIT) { | |
+ inet_twsk_put(inet_twsk(sk)); | |
+ return; | |
+ } | |
- return 0; | |
+ skb_orphan(skb); | |
+ skb->sk = sk; | |
+ skb->destructor = nf_tproxy_destructor; | |
} | |
EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); | |
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c | |
new file mode 100644 | |
index 0000000..59962e8 | |
--- /dev/null | |
+++ b/net/netfilter/xt_qtaguid.c | |
@@ -0,0 +1,2793 @@ | |
+/* | |
+ * Kernel iptables module to track stats for packets based on user tags. | |
+ * | |
+ * (C) 2011 Google, Inc | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+ | |
+/* | |
+ * There are run-time debug flags enabled via the debug_mask module param, or | |
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. | |
+ */ | |
+#define DEBUG | |
+ | |
+#ifndef pr_fmt | |
+#define pr_fmt(fmt) fmt | |
+#endif | |
+ | |
+#define pr_warn_once(fmt, ...) \ | |
+ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) | |
+ | |
+#include <linux/file.h> | |
+#include <linux/inetdevice.h> | |
+#include <linux/module.h> | |
+#include <linux/netfilter/x_tables.h> | |
+#include <linux/netfilter/xt_qtaguid.h> | |
+#include <linux/skbuff.h> | |
+#include <linux/workqueue.h> | |
+#include <net/addrconf.h> | |
+#include <net/sock.h> | |
+#include <net/tcp.h> | |
+#include <net/udp.h> | |
+ | |
+#include <linux/netfilter/xt_socket.h> | |
+#include "xt_qtaguid_internal.h" | |
+#include "xt_qtaguid_print.h" | |
+ | |
+/* | |
+ * We only use the xt_socket funcs within a similar context to avoid unexpected | |
+ * return values. | |
+ */ | |
+#define XT_SOCKET_SUPPORTED_HOOKS \ | |
+ ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) | |
+ | |
+ | |
+static const char *module_procdirname = "xt_qtaguid"; | |
+static struct proc_dir_entry *xt_qtaguid_procdir; | |
+ | |
+static unsigned int proc_iface_perms = S_IRUGO; | |
+module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); | |
+ | |
+static struct proc_dir_entry *xt_qtaguid_stats_file; | |
+static unsigned int proc_stats_perms = S_IRUGO; | |
+module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); | |
+ | |
+static struct proc_dir_entry *xt_qtaguid_ctrl_file; | |
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK | |
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; | |
+#else | |
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; | |
+#endif | |
+module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); | |
+ | |
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK | |
+#include <linux/android_aid.h> | |
+static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; | |
+static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; | |
+#else | |
+/* 0 means, don't limit anybody */ | |
+static gid_t proc_stats_readall_gid; | |
+static gid_t proc_ctrl_write_gid; | |
+#endif | |
+module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, | |
+ S_IRUGO | S_IWUSR); | |
+module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, | |
+ S_IRUGO | S_IWUSR); | |
+ | |
+/* | |
+ * Limit the number of active tags (via socket tags) for a given UID. | |
+ * Multiple processes could share the UID. | |
+ */ | |
+static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; | |
+module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); | |
+ | |
+/* | |
+ * After the kernel has initiallized this module, it is still possible | |
+ * to make it passive. | |
+ * Setting passive to Y: | |
+ * - the iface stats handling will not act on notifications. | |
+ * - iptables matches will never match. | |
+ * - ctrl commands silently succeed. | |
+ * - stats are always empty. | |
+ * This is mostly usefull when a bug is suspected. | |
+ */ | |
+static bool module_passive; | |
+module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); | |
+ | |
+/* | |
+ * Control how qtaguid data is tracked per proc/uid. | |
+ * Setting tag_tracking_passive to Y: | |
+ * - don't create proc specific structs to track tags | |
+ * - don't check that active tag stats exceed some limits. | |
+ * - don't clean up socket tags on process exits. | |
+ * This is mostly usefull when a bug is suspected. | |
+ */ | |
+static bool qtu_proc_handling_passive; | |
+module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, | |
+ S_IRUGO | S_IWUSR); | |
+ | |
+#define QTU_DEV_NAME "xt_qtaguid" | |
+ | |
+uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; | |
+module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); | |
+ | |
+/*---------------------------------------------------------------------------*/ | |
+static const char *iface_stat_procdirname = "iface_stat"; | |
+static struct proc_dir_entry *iface_stat_procdir; | |
+static const char *iface_stat_all_procfilename = "iface_stat_all"; | |
+static struct proc_dir_entry *iface_stat_all_procfile; | |
+ | |
+/* | |
+ * Ordering of locks: | |
+ * outer locks: | |
+ * iface_stat_list_lock | |
+ * sock_tag_list_lock | |
+ * inner locks: | |
+ * uid_tag_data_tree_lock | |
+ * tag_counter_set_list_lock | |
+ * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock | |
+ * is acquired. | |
+ * | |
+ * Call tree with all lock holders as of 2011-09-25: | |
+ * | |
+ * iface_stat_all_proc_read() | |
+ * iface_stat_list_lock | |
+ * (struct iface_stat) | |
+ * | |
+ * qtaguid_ctrl_proc_read() | |
+ * sock_tag_list_lock | |
+ * (sock_tag_tree) | |
+ * (struct proc_qtu_data->sock_tag_list) | |
+ * prdebug_full_state() | |
+ * sock_tag_list_lock | |
+ * (sock_tag_tree) | |
+ * uid_tag_data_tree_lock | |
+ * (uid_tag_data_tree) | |
+ * (proc_qtu_data_tree) | |
+ * iface_stat_list_lock | |
+ * | |
+ * qtaguid_stats_proc_read() | |
+ * iface_stat_list_lock | |
+ * struct iface_stat->tag_stat_list_lock | |
+ * | |
+ * qtudev_open() | |
+ * uid_tag_data_tree_lock | |
+ * | |
+ * qtudev_release() | |
+ * sock_tag_data_list_lock | |
+ * uid_tag_data_tree_lock | |
+ * prdebug_full_state() | |
+ * sock_tag_list_lock | |
+ * uid_tag_data_tree_lock | |
+ * iface_stat_list_lock | |
+ * | |
+ * iface_netdev_event_handler() | |
+ * iface_stat_create() | |
+ * iface_stat_list_lock | |
+ * iface_stat_update() | |
+ * iface_stat_list_lock | |
+ * | |
+ * iface_inetaddr_event_handler() | |
+ * iface_stat_create() | |
+ * iface_stat_list_lock | |
+ * iface_stat_update() | |
+ * iface_stat_list_lock | |
+ * | |
+ * iface_inet6addr_event_handler() | |
+ * iface_stat_create_ipv6() | |
+ * iface_stat_list_lock | |
+ * iface_stat_update() | |
+ * iface_stat_list_lock | |
+ * | |
+ * qtaguid_mt() | |
+ * account_for_uid() | |
+ * if_tag_stat_update() | |
+ * get_sock_stat() | |
+ * sock_tag_list_lock | |
+ * struct iface_stat->tag_stat_list_lock | |
+ * tag_stat_update() | |
+ * get_active_counter_set() | |
+ * tag_counter_set_list_lock | |
+ * tag_stat_update() | |
+ * get_active_counter_set() | |
+ * tag_counter_set_list_lock | |
+ * | |
+ * | |
+ * qtaguid_ctrl_parse() | |
+ * ctrl_cmd_delete() | |
+ * sock_tag_list_lock | |
+ * tag_counter_set_list_lock | |
+ * iface_stat_list_lock | |
+ * struct iface_stat->tag_stat_list_lock | |
+ * uid_tag_data_tree_lock | |
+ * ctrl_cmd_counter_set() | |
+ * tag_counter_set_list_lock | |
+ * ctrl_cmd_tag() | |
+ * sock_tag_list_lock | |
+ * (sock_tag_tree) | |
+ * get_tag_ref() | |
+ * uid_tag_data_tree_lock | |
+ * (uid_tag_data_tree) | |
+ * uid_tag_data_tree_lock | |
+ * (proc_qtu_data_tree) | |
+ * ctrl_cmd_untag() | |
+ * sock_tag_list_lock | |
+ * uid_tag_data_tree_lock | |
+ * | |
+ */ | |
+static LIST_HEAD(iface_stat_list); | |
+static DEFINE_SPINLOCK(iface_stat_list_lock); | |
+ | |
+static struct rb_root sock_tag_tree = RB_ROOT; | |
+static DEFINE_SPINLOCK(sock_tag_list_lock); | |
+ | |
+static struct rb_root tag_counter_set_tree = RB_ROOT; | |
+static DEFINE_SPINLOCK(tag_counter_set_list_lock); | |
+ | |
+static struct rb_root uid_tag_data_tree = RB_ROOT; | |
+static DEFINE_SPINLOCK(uid_tag_data_tree_lock); | |
+ | |
+static struct rb_root proc_qtu_data_tree = RB_ROOT; | |
+/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ | |
+ | |
+static struct qtaguid_event_counts qtu_events; | |
+/*----------------------------------------------*/ | |
+static bool can_manipulate_uids(void) | |
+{ | |
+ /* root pwnd */ | |
+ return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) | |
+ || in_egroup_p(proc_ctrl_write_gid); | |
+} | |
+ | |
+static bool can_impersonate_uid(uid_t uid) | |
+{ | |
+ return uid == current_fsuid() || can_manipulate_uids(); | |
+} | |
+ | |
+static bool can_read_other_uid_stats(uid_t uid) | |
+{ | |
+ /* root pwnd */ | |
+ return unlikely(!current_fsuid()) || uid == current_fsuid() | |
+ || unlikely(!proc_stats_readall_gid) | |
+ || in_egroup_p(proc_stats_readall_gid); | |
+} | |
+ | |
+static inline void dc_add_byte_packets(struct data_counters *counters, int set, | |
+ enum ifs_tx_rx direction, | |
+ enum ifs_proto ifs_proto, | |
+ int bytes, | |
+ int packets) | |
+{ | |
+ counters->bpc[set][direction][ifs_proto].bytes += bytes; | |
+ counters->bpc[set][direction][ifs_proto].packets += packets; | |
+} | |
+ | |
+static inline uint64_t dc_sum_bytes(struct data_counters *counters, | |
+ int set, | |
+ enum ifs_tx_rx direction) | |
+{ | |
+ return counters->bpc[set][direction][IFS_TCP].bytes | |
+ + counters->bpc[set][direction][IFS_UDP].bytes | |
+ + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; | |
+} | |
+ | |
+static inline uint64_t dc_sum_packets(struct data_counters *counters, | |
+ int set, | |
+ enum ifs_tx_rx direction) | |
+{ | |
+ return counters->bpc[set][direction][IFS_TCP].packets | |
+ + counters->bpc[set][direction][IFS_UDP].packets | |
+ + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; | |
+} | |
+ | |
+static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) | |
+{ | |
+ struct rb_node *node = root->rb_node; | |
+ | |
+ while (node) { | |
+ struct tag_node *data = rb_entry(node, struct tag_node, node); | |
+ int result; | |
+ RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | |
+ " node=%p data=%p\n", tag, node, data); | |
+ result = tag_compare(tag, data->tag); | |
+ RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | |
+ " data.tag=0x%llx (uid=%u) res=%d\n", | |
+ tag, data->tag, get_uid_from_tag(data->tag), result); | |
+ if (result < 0) | |
+ node = node->rb_left; | |
+ else if (result > 0) | |
+ node = node->rb_right; | |
+ else | |
+ return data; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) | |
+{ | |
+ struct rb_node **new = &(root->rb_node), *parent = NULL; | |
+ | |
+ /* Figure out where to put new node */ | |
+ while (*new) { | |
+ struct tag_node *this = rb_entry(*new, struct tag_node, | |
+ node); | |
+ int result = tag_compare(data->tag, this->tag); | |
+ RB_DEBUG("qtaguid: %s(): tag=0x%llx" | |
+ " (uid=%u)\n", __func__, | |
+ this->tag, | |
+ get_uid_from_tag(this->tag)); | |
+ parent = *new; | |
+ if (result < 0) | |
+ new = &((*new)->rb_left); | |
+ else if (result > 0) | |
+ new = &((*new)->rb_right); | |
+ else | |
+ BUG(); | |
+ } | |
+ | |
+ /* Add new node and rebalance tree. */ | |
+ rb_link_node(&data->node, parent, new); | |
+ rb_insert_color(&data->node, root); | |
+} | |
+ | |
+static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) | |
+{ | |
+ tag_node_tree_insert(&data->tn, root); | |
+} | |
+ | |
+static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) | |
+{ | |
+ struct tag_node *node = tag_node_tree_search(root, tag); | |
+ if (!node) | |
+ return NULL; | |
+ return rb_entry(&node->node, struct tag_stat, tn.node); | |
+} | |
+ | |
+static void tag_counter_set_tree_insert(struct tag_counter_set *data, | |
+ struct rb_root *root) | |
+{ | |
+ tag_node_tree_insert(&data->tn, root); | |
+} | |
+ | |
+static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, | |
+ tag_t tag) | |
+{ | |
+ struct tag_node *node = tag_node_tree_search(root, tag); | |
+ if (!node) | |
+ return NULL; | |
+ return rb_entry(&node->node, struct tag_counter_set, tn.node); | |
+ | |
+} | |
+ | |
+static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) | |
+{ | |
+ tag_node_tree_insert(&data->tn, root); | |
+} | |
+ | |
+static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) | |
+{ | |
+ struct tag_node *node = tag_node_tree_search(root, tag); | |
+ if (!node) | |
+ return NULL; | |
+ return rb_entry(&node->node, struct tag_ref, tn.node); | |
+} | |
+ | |
+static struct sock_tag *sock_tag_tree_search(struct rb_root *root, | |
+ const struct sock *sk) | |
+{ | |
+ struct rb_node *node = root->rb_node; | |
+ | |
+ while (node) { | |
+ struct sock_tag *data = rb_entry(node, struct sock_tag, | |
+ sock_node); | |
+ if (sk < data->sk) | |
+ node = node->rb_left; | |
+ else if (sk > data->sk) | |
+ node = node->rb_right; | |
+ else | |
+ return data; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) | |
+{ | |
+ struct rb_node **new = &(root->rb_node), *parent = NULL; | |
+ | |
+ /* Figure out where to put new node */ | |
+ while (*new) { | |
+ struct sock_tag *this = rb_entry(*new, struct sock_tag, | |
+ sock_node); | |
+ parent = *new; | |
+ if (data->sk < this->sk) | |
+ new = &((*new)->rb_left); | |
+ else if (data->sk > this->sk) | |
+ new = &((*new)->rb_right); | |
+ else | |
+ BUG(); | |
+ } | |
+ | |
+ /* Add new node and rebalance tree. */ | |
+ rb_link_node(&data->sock_node, parent, new); | |
+ rb_insert_color(&data->sock_node, root); | |
+} | |
+ | |
+static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) | |
+{ | |
+ struct rb_node *node; | |
+ struct sock_tag *st_entry; | |
+ | |
+ node = rb_first(st_to_free_tree); | |
+ while (node) { | |
+ st_entry = rb_entry(node, struct sock_tag, sock_node); | |
+ node = rb_next(node); | |
+ CT_DEBUG("qtaguid: %s(): " | |
+ "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, | |
+ st_entry->sk, | |
+ st_entry->tag, | |
+ get_uid_from_tag(st_entry->tag)); | |
+ rb_erase(&st_entry->sock_node, st_to_free_tree); | |
+ sockfd_put(st_entry->socket); | |
+ kfree(st_entry); | |
+ } | |
+} | |
+ | |
+static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, | |
+ const pid_t pid) | |
+{ | |
+ struct rb_node *node = root->rb_node; | |
+ | |
+ while (node) { | |
+ struct proc_qtu_data *data = rb_entry(node, | |
+ struct proc_qtu_data, | |
+ node); | |
+ if (pid < data->pid) | |
+ node = node->rb_left; | |
+ else if (pid > data->pid) | |
+ node = node->rb_right; | |
+ else | |
+ return data; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, | |
+ struct rb_root *root) | |
+{ | |
+ struct rb_node **new = &(root->rb_node), *parent = NULL; | |
+ | |
+ /* Figure out where to put new node */ | |
+ while (*new) { | |
+ struct proc_qtu_data *this = rb_entry(*new, | |
+ struct proc_qtu_data, | |
+ node); | |
+ parent = *new; | |
+ if (data->pid < this->pid) | |
+ new = &((*new)->rb_left); | |
+ else if (data->pid > this->pid) | |
+ new = &((*new)->rb_right); | |
+ else | |
+ BUG(); | |
+ } | |
+ | |
+ /* Add new node and rebalance tree. */ | |
+ rb_link_node(&data->node, parent, new); | |
+ rb_insert_color(&data->node, root); | |
+} | |
+ | |
+static void uid_tag_data_tree_insert(struct uid_tag_data *data, | |
+ struct rb_root *root) | |
+{ | |
+ struct rb_node **new = &(root->rb_node), *parent = NULL; | |
+ | |
+ /* Figure out where to put new node */ | |
+ while (*new) { | |
+ struct uid_tag_data *this = rb_entry(*new, | |
+ struct uid_tag_data, | |
+ node); | |
+ parent = *new; | |
+ if (data->uid < this->uid) | |
+ new = &((*new)->rb_left); | |
+ else if (data->uid > this->uid) | |
+ new = &((*new)->rb_right); | |
+ else | |
+ BUG(); | |
+ } | |
+ | |
+ /* Add new node and rebalance tree. */ | |
+ rb_link_node(&data->node, parent, new); | |
+ rb_insert_color(&data->node, root); | |
+} | |
+ | |
+static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, | |
+ uid_t uid) | |
+{ | |
+ struct rb_node *node = root->rb_node; | |
+ | |
+ while (node) { | |
+ struct uid_tag_data *data = rb_entry(node, | |
+ struct uid_tag_data, | |
+ node); | |
+ if (uid < data->uid) | |
+ node = node->rb_left; | |
+ else if (uid > data->uid) | |
+ node = node->rb_right; | |
+ else | |
+ return data; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+/* | |
+ * Allocates a new uid_tag_data struct if needed. | |
+ * Returns a pointer to the found or allocated uid_tag_data. | |
+ * Returns a PTR_ERR on failures, and lock is not held. | |
+ * If found is not NULL: | |
+ * sets *found to true if not allocated. | |
+ * sets *found to false if allocated. | |
+ */ | |
+struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) | |
+{ | |
+ struct uid_tag_data *utd_entry; | |
+ | |
+ /* Look for top level uid_tag_data for the UID */ | |
+ utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); | |
+ DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); | |
+ | |
+ if (found_res) | |
+ *found_res = utd_entry; | |
+ if (utd_entry) | |
+ return utd_entry; | |
+ | |
+ utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); | |
+ if (!utd_entry) { | |
+ pr_err("qtaguid: get_uid_data(%u): " | |
+ "tag data alloc failed\n", uid); | |
+ return ERR_PTR(-ENOMEM); | |
+ } | |
+ | |
+ utd_entry->uid = uid; | |
+ utd_entry->tag_ref_tree = RB_ROOT; | |
+ uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); | |
+ DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); | |
+ return utd_entry; | |
+} | |
+ | |
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */ | |
+static struct tag_ref *new_tag_ref(tag_t new_tag, | |
+ struct uid_tag_data *utd_entry) | |
+{ | |
+ struct tag_ref *tr_entry; | |
+ int res; | |
+ | |
+ if (utd_entry->num_active_tags + 1 > max_sock_tags) { | |
+ pr_info("qtaguid: new_tag_ref(0x%llx): " | |
+ "tag ref alloc quota exceeded. max=%d\n", | |
+ new_tag, max_sock_tags); | |
+ res = -EMFILE; | |
+ goto err_res; | |
+ | |
+ } | |
+ | |
+ tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); | |
+ if (!tr_entry) { | |
+ pr_err("qtaguid: new_tag_ref(0x%llx): " | |
+ "tag ref alloc failed\n", | |
+ new_tag); | |
+ res = -ENOMEM; | |
+ goto err_res; | |
+ } | |
+ tr_entry->tn.tag = new_tag; | |
+ /* tr_entry->num_sock_tags handled by caller */ | |
+ utd_entry->num_active_tags++; | |
+ tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); | |
+ DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " | |
+ " inserted new tag ref %p\n", | |
+ new_tag, tr_entry); | |
+ return tr_entry; | |
+ | |
+err_res: | |
+ return ERR_PTR(res); | |
+} | |
+ | |
+static struct tag_ref *lookup_tag_ref(tag_t full_tag, | |
+ struct uid_tag_data **utd_res) | |
+{ | |
+ struct uid_tag_data *utd_entry; | |
+ struct tag_ref *tr_entry; | |
+ bool found_utd; | |
+ uid_t uid = get_uid_from_tag(full_tag); | |
+ | |
+ DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", | |
+ full_tag, uid); | |
+ | |
+ utd_entry = get_uid_data(uid, &found_utd); | |
+ if (IS_ERR_OR_NULL(utd_entry)) { | |
+ if (utd_res) | |
+ *utd_res = utd_entry; | |
+ return NULL; | |
+ } | |
+ | |
+ tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); | |
+ if (utd_res) | |
+ *utd_res = utd_entry; | |
+ DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", | |
+ full_tag, utd_entry, tr_entry); | |
+ return tr_entry; | |
+} | |
+ | |
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */ | |
+static struct tag_ref *get_tag_ref(tag_t full_tag, | |
+ struct uid_tag_data **utd_res) | |
+{ | |
+ struct uid_tag_data *utd_entry; | |
+ struct tag_ref *tr_entry; | |
+ | |
+ DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", | |
+ full_tag); | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ tr_entry = lookup_tag_ref(full_tag, &utd_entry); | |
+ BUG_ON(IS_ERR_OR_NULL(utd_entry)); | |
+ if (!tr_entry) | |
+ tr_entry = new_tag_ref(full_tag, utd_entry); | |
+ | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ if (utd_res) | |
+ *utd_res = utd_entry; | |
+ DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", | |
+ full_tag, utd_entry, tr_entry); | |
+ return tr_entry; | |
+} | |
+ | |
+/* Checks and maybe frees the UID Tag Data entry */ | |
+static void put_utd_entry(struct uid_tag_data *utd_entry) | |
+{ | |
+ /* Are we done with the UID tag data entry? */ | |
+ if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && | |
+ !utd_entry->num_pqd) { | |
+ DR_DEBUG("qtaguid: %s(): " | |
+ "erase utd_entry=%p uid=%u " | |
+ "by pid=%u tgid=%u uid=%u\n", __func__, | |
+ utd_entry, utd_entry->uid, | |
+ current->pid, current->tgid, current_fsuid()); | |
+ BUG_ON(utd_entry->num_active_tags); | |
+ rb_erase(&utd_entry->node, &uid_tag_data_tree); | |
+ kfree(utd_entry); | |
+ } else { | |
+ DR_DEBUG("qtaguid: %s(): " | |
+ "utd_entry=%p still has %d tags %d proc_qtu_data\n", | |
+ __func__, utd_entry, utd_entry->num_active_tags, | |
+ utd_entry->num_pqd); | |
+ BUG_ON(!(utd_entry->num_active_tags || | |
+ utd_entry->num_pqd)); | |
+ } | |
+} | |
+ | |
+/* | |
+ * If no sock_tags are using this tag_ref, | |
+ * decrements refcount of utd_entry, removes tr_entry | |
+ * from utd_entry->tag_ref_tree and frees. | |
+ */ | |
+static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, | |
+ struct uid_tag_data *utd_entry) | |
+{ | |
+ DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, | |
+ tr_entry, tr_entry->tn.tag, | |
+ get_uid_from_tag(tr_entry->tn.tag)); | |
+ if (!tr_entry->num_sock_tags) { | |
+ BUG_ON(!utd_entry->num_active_tags); | |
+ utd_entry->num_active_tags--; | |
+ rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); | |
+ DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); | |
+ kfree(tr_entry); | |
+ } | |
+} | |
+ | |
+static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) | |
+{ | |
+ struct rb_node *node; | |
+ struct tag_ref *tr_entry; | |
+ tag_t acct_tag; | |
+ | |
+ DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, | |
+ full_tag, get_uid_from_tag(full_tag)); | |
+ acct_tag = get_atag_from_tag(full_tag); | |
+ node = rb_first(&utd_entry->tag_ref_tree); | |
+ while (node) { | |
+ tr_entry = rb_entry(node, struct tag_ref, tn.node); | |
+ node = rb_next(node); | |
+ if (!acct_tag || tr_entry->tn.tag == full_tag) | |
+ free_tag_ref_from_utd_entry(tr_entry, utd_entry); | |
+ } | |
+} | |
+ | |
+static int read_proc_u64(char *page, char **start, off_t off, | |
+ int count, int *eof, void *data) | |
+{ | |
+ int len; | |
+ uint64_t value; | |
+ char *p = page; | |
+ uint64_t *iface_entry = data; | |
+ | |
+ if (!data) | |
+ return 0; | |
+ | |
+ value = *iface_entry; | |
+ p += sprintf(p, "%llu\n", value); | |
+ len = (p - page) - off; | |
+ *eof = (len <= count) ? 1 : 0; | |
+ *start = page + off; | |
+ return len; | |
+} | |
+ | |
+static int read_proc_bool(char *page, char **start, off_t off, | |
+ int count, int *eof, void *data) | |
+{ | |
+ int len; | |
+ bool value; | |
+ char *p = page; | |
+ bool *bool_entry = data; | |
+ | |
+ if (!data) | |
+ return 0; | |
+ | |
+ value = *bool_entry; | |
+ p += sprintf(p, "%u\n", value); | |
+ len = (p - page) - off; | |
+ *eof = (len <= count) ? 1 : 0; | |
+ *start = page + off; | |
+ return len; | |
+} | |
+ | |
+static int get_active_counter_set(tag_t tag) | |
+{ | |
+ int active_set = 0; | |
+ struct tag_counter_set *tcs; | |
+ | |
+ MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" | |
+ " (uid=%u)\n", | |
+ tag, get_uid_from_tag(tag)); | |
+ /* For now we only handle UID tags for active sets */ | |
+ tag = get_utag_from_tag(tag); | |
+ spin_lock_bh(&tag_counter_set_list_lock); | |
+ tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | |
+ if (tcs) | |
+ active_set = tcs->active_set; | |
+ spin_unlock_bh(&tag_counter_set_list_lock); | |
+ return active_set; | |
+} | |
+ | |
+/* | |
+ * Find the entry for tracking the specified interface. | |
+ * Caller must hold iface_stat_list_lock | |
+ */ | |
+static struct iface_stat *get_iface_entry(const char *ifname) | |
+{ | |
+ struct iface_stat *iface_entry; | |
+ | |
+ /* Find the entry for tracking the specified tag within the interface */ | |
+ if (ifname == NULL) { | |
+ pr_info("qtaguid: iface_stat: get() NULL device name\n"); | |
+ return NULL; | |
+ } | |
+ | |
+ /* Iterate over interfaces */ | |
+ list_for_each_entry(iface_entry, &iface_stat_list, list) { | |
+ if (!strcmp(ifname, iface_entry->ifname)) | |
+ goto done; | |
+ } | |
+ iface_entry = NULL; | |
+done: | |
+ return iface_entry; | |
+} | |
+ | |
+static int iface_stat_all_proc_read(char *page, char **num_items_returned, | |
+ off_t items_to_skip, int char_count, | |
+ int *eof, void *data) | |
+{ | |
+ char *outp = page; | |
+ int item_index = 0; | |
+ int len; | |
+ struct iface_stat *iface_entry; | |
+ const struct net_device_stats *stats; | |
+ const struct net_device_stats no_dev_stats = {0}; | |
+ | |
+ if (unlikely(module_passive)) { | |
+ *eof = 1; | |
+ return 0; | |
+ } | |
+ | |
+ CT_DEBUG("qtaguid:proc iface_stat_all " | |
+ "page=%p *num_items_returned=%p off=%ld " | |
+ "char_count=%d *eof=%d\n", page, *num_items_returned, | |
+ items_to_skip, char_count, *eof); | |
+ | |
+ if (*eof) | |
+ return 0; | |
+ | |
+ /* | |
+ * This lock will prevent iface_stat_update() from changing active, | |
+ * and in turn prevent an interface from unregistering itself. | |
+ */ | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ list_for_each_entry(iface_entry, &iface_stat_list, list) { | |
+ if (item_index++ < items_to_skip) | |
+ continue; | |
+ | |
+ if (iface_entry->active) { | |
+ stats = dev_get_stats(iface_entry->net_dev); | |
+ } else { | |
+ stats = &no_dev_stats; | |
+ } | |
+ len = snprintf(outp, char_count, | |
+ "%s %d " | |
+ "%llu %llu %llu %llu " | |
+ "%lu %lu %lu %lu\n", | |
+ iface_entry->ifname, | |
+ iface_entry->active, | |
+ iface_entry->totals[IFS_RX].bytes, | |
+ iface_entry->totals[IFS_RX].packets, | |
+ iface_entry->totals[IFS_TX].bytes, | |
+ iface_entry->totals[IFS_TX].packets, | |
+ stats->rx_bytes, stats->rx_packets, | |
+ stats->tx_bytes, stats->tx_packets); | |
+ if (len >= char_count) { | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ *outp = '\0'; | |
+ return outp - page; | |
+ } | |
+ outp += len; | |
+ char_count -= len; | |
+ (*num_items_returned)++; | |
+ } | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ | |
+ *eof = 1; | |
+ return outp - page; | |
+} | |
+ | |
+static void iface_create_proc_worker(struct work_struct *work) | |
+{ | |
+ struct proc_dir_entry *proc_entry; | |
+ struct iface_stat_work *isw = container_of(work, struct iface_stat_work, | |
+ iface_work); | |
+ struct iface_stat *new_iface = isw->iface_entry; | |
+ | |
+ /* iface_entries are not deleted, so safe to manipulate. */ | |
+ proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); | |
+ if (IS_ERR_OR_NULL(proc_entry)) { | |
+ pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); | |
+ kfree(isw); | |
+ return; | |
+ } | |
+ | |
+ new_iface->proc_ptr = proc_entry; | |
+ | |
+ create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, | |
+ read_proc_u64, &new_iface->totals[IFS_TX].bytes); | |
+ create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, | |
+ read_proc_u64, &new_iface->totals[IFS_RX].bytes); | |
+ create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, | |
+ read_proc_u64, &new_iface->totals[IFS_TX].packets); | |
+ create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, | |
+ read_proc_u64, &new_iface->totals[IFS_RX].packets); | |
+ create_proc_read_entry("active", proc_iface_perms, proc_entry, | |
+ read_proc_bool, &new_iface->active); | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: create_proc(): done " | |
+ "entry=%p dev=%s\n", new_iface, new_iface->ifname); | |
+ kfree(isw); | |
+} | |
+ | |
+/* | |
+ * Will set the entry's active state, and | |
+ * update the net_dev accordingly also. | |
+ */ | |
+static void _iface_stat_set_active(struct iface_stat *entry, | |
+ struct net_device *net_dev, | |
+ bool activate) | |
+{ | |
+ if (activate) { | |
+ entry->net_dev = net_dev; | |
+ entry->active = true; | |
+/* | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "enable tracking. rfcnt=%d\n", __func__, | |
+ entry->ifname, | |
+ percpu_read(*net_dev->pcpu_refcnt)); | |
+*/ | |
+ } else { | |
+ entry->active = false; | |
+ entry->net_dev = NULL; | |
+/* | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "disable tracking. rfcnt=%d\n", __func__, | |
+ entry->ifname, | |
+ percpu_read(*net_dev->pcpu_refcnt)); | |
+*/ | |
+ } | |
+} | |
+ | |
+/* Caller must hold iface_stat_list_lock */ | |
+static struct iface_stat *iface_alloc(struct net_device *net_dev) | |
+{ | |
+ struct iface_stat *new_iface; | |
+ struct iface_stat_work *isw; | |
+ | |
+ new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); | |
+ if (new_iface == NULL) { | |
+ pr_err("qtaguid: iface_stat: create(%s): " | |
+ "iface_stat alloc failed\n", net_dev->name); | |
+ return NULL; | |
+ } | |
+ new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); | |
+ if (new_iface->ifname == NULL) { | |
+ pr_err("qtaguid: iface_stat: create(%s): " | |
+ "ifname alloc failed\n", net_dev->name); | |
+ kfree(new_iface); | |
+ return NULL; | |
+ } | |
+ spin_lock_init(&new_iface->tag_stat_list_lock); | |
+ new_iface->tag_stat_tree = RB_ROOT; | |
+ _iface_stat_set_active(new_iface, net_dev, true); | |
+ | |
+ /* | |
+ * ipv6 notifier chains are atomic :( | |
+ * No create_proc_read_entry() for you! | |
+ */ | |
+ isw = kmalloc(sizeof(*isw), GFP_ATOMIC); | |
+ if (!isw) { | |
+ pr_err("qtaguid: iface_stat: create(%s): " | |
+ "work alloc failed\n", new_iface->ifname); | |
+ _iface_stat_set_active(new_iface, net_dev, false); | |
+ kfree(new_iface->ifname); | |
+ kfree(new_iface); | |
+ return NULL; | |
+ } | |
+ isw->iface_entry = new_iface; | |
+ INIT_WORK(&isw->iface_work, iface_create_proc_worker); | |
+ schedule_work(&isw->iface_work); | |
+ list_add(&new_iface->list, &iface_stat_list); | |
+ return new_iface; | |
+} | |
+ | |
+static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, | |
+ struct iface_stat *iface) | |
+{ | |
+ const struct net_device_stats *stats; | |
+ bool stats_rewound; | |
+ | |
+ stats = dev_get_stats(net_dev); | |
+ /* No empty packets */ | |
+ stats_rewound = | |
+ (stats->rx_bytes < iface->last_known[IFS_RX].bytes) | |
+ || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); | |
+ | |
+ IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " | |
+ "bytes rx/tx=%lu/%lu " | |
+ "active=%d last_known=%d " | |
+ "stats_rewound=%d\n", __func__, | |
+ net_dev ? net_dev->name : "?", | |
+ iface, net_dev, | |
+ stats->rx_bytes, stats->tx_bytes, | |
+ iface->active, iface->last_known_valid, stats_rewound); | |
+ | |
+ if (iface->active && iface->last_known_valid && stats_rewound) { | |
+ pr_warn_once("qtaguid: iface_stat: %s(%s): " | |
+ "iface reset its stats unexpectedly\n", __func__, | |
+ net_dev->name); | |
+ iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; | |
+ iface->totals[IFS_TX].packets += | |
+ iface->last_known[IFS_TX].packets; | |
+ iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; | |
+ iface->totals[IFS_RX].packets += | |
+ iface->last_known[IFS_RX].packets; | |
+ iface->last_known_valid = false; | |
+ IF_DEBUG("qtaguid: %s(%s): iface=%p " | |
+ "used last known bytes rx/tx=%llu/%llu\n", __func__, | |
+ iface->ifname, iface, iface->last_known[IFS_RX].bytes, | |
+ iface->last_known[IFS_TX].bytes); | |
+ } | |
+} | |
+ | |
+/* | |
+ * Create a new entry for tracking the specified interface. | |
+ * Do nothing if the entry already exists. | |
+ * Called when an interface is configured with a valid IP address. | |
+ */ | |
+static void iface_stat_create(struct net_device *net_dev, | |
+ struct in_ifaddr *ifa) | |
+{ | |
+ struct in_device *in_dev = NULL; | |
+ const char *ifname; | |
+ struct iface_stat *entry; | |
+ __be32 ipaddr = 0; | |
+ struct iface_stat *new_iface; | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", | |
+ net_dev ? net_dev->name : "?", | |
+ ifa, net_dev); | |
+ if (!net_dev) { | |
+ pr_err("qtaguid: iface_stat: create(): no net dev\n"); | |
+ return; | |
+ } | |
+ | |
+ ifname = net_dev->name; | |
+ if (!ifa) { | |
+ in_dev = in_dev_get(net_dev); | |
+ if (!in_dev) { | |
+ pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", | |
+ ifname); | |
+ return; | |
+ } | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", | |
+ ifname, in_dev); | |
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): " | |
+ "ifa=%p ifa_label=%s\n", | |
+ ifname, ifa, | |
+ ifa->ifa_label ? ifa->ifa_label : "(null)"); | |
+ if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (!ifa) { | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", | |
+ ifname); | |
+ goto done_put; | |
+ } | |
+ ipaddr = ifa->ifa_local; | |
+ | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ entry = get_iface_entry(ifname); | |
+ if (entry != NULL) { | |
+ bool activate = !ipv4_is_loopback(ipaddr); | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", | |
+ ifname, entry); | |
+ iface_check_stats_reset_and_adjust(net_dev, entry); | |
+ _iface_stat_set_active(entry, net_dev, activate); | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "tracking now %d on ip=%pI4\n", __func__, | |
+ entry->ifname, activate, &ipaddr); | |
+ goto done_unlock_put; | |
+ } else if (ipv4_is_loopback(ipaddr)) { | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): " | |
+ "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); | |
+ goto done_unlock_put; | |
+ } | |
+ | |
+ new_iface = iface_alloc(net_dev); | |
+ IF_DEBUG("qtaguid: iface_stat: create(%s): done " | |
+ "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); | |
+done_unlock_put: | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+done_put: | |
+ if (in_dev) | |
+ in_dev_put(in_dev); | |
+} | |
+ | |
+static void iface_stat_create_ipv6(struct net_device *net_dev, | |
+ struct inet6_ifaddr *ifa) | |
+{ | |
+ struct in_device *in_dev; | |
+ const char *ifname; | |
+ struct iface_stat *entry; | |
+ struct iface_stat *new_iface; | |
+ int addr_type; | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", | |
+ ifa, net_dev, net_dev ? net_dev->name : ""); | |
+ if (!net_dev) { | |
+ pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); | |
+ return; | |
+ } | |
+ ifname = net_dev->name; | |
+ | |
+ in_dev = in_dev_get(net_dev); | |
+ if (!in_dev) { | |
+ pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", | |
+ ifname); | |
+ return; | |
+ } | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", | |
+ ifname, in_dev); | |
+ | |
+ if (!ifa) { | |
+ IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", | |
+ ifname); | |
+ goto done_put; | |
+ } | |
+ addr_type = ipv6_addr_type(&ifa->addr); | |
+ | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ entry = get_iface_entry(ifname); | |
+ if (entry != NULL) { | |
+ bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); | |
+ IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | |
+ ifname, entry); | |
+ iface_check_stats_reset_and_adjust(net_dev, entry); | |
+ _iface_stat_set_active(entry, net_dev, activate); | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "tracking now %d on ip=%pI6c\n", __func__, | |
+ entry->ifname, activate, &ifa->addr); | |
+ goto done_unlock_put; | |
+ } else if (addr_type & IPV6_ADDR_LOOPBACK) { | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "ignore loopback dev. ip=%pI6c\n", __func__, | |
+ ifname, &ifa->addr); | |
+ goto done_unlock_put; | |
+ } | |
+ | |
+ new_iface = iface_alloc(net_dev); | |
+ IF_DEBUG("qtaguid: iface_stat: create6(%s): done " | |
+ "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); | |
+ | |
+done_unlock_put: | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+done_put: | |
+ in_dev_put(in_dev); | |
+} | |
+ | |
+static struct sock_tag *get_sock_stat_nl(const struct sock *sk) | |
+{ | |
+ MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); | |
+ return sock_tag_tree_search(&sock_tag_tree, sk); | |
+} | |
+ | |
+static struct sock_tag *get_sock_stat(const struct sock *sk) | |
+{ | |
+ struct sock_tag *sock_tag_entry; | |
+ MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); | |
+ if (!sk) | |
+ return NULL; | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ sock_tag_entry = get_sock_stat_nl(sk); | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ return sock_tag_entry; | |
+} | |
+ | |
+static void | |
+data_counters_update(struct data_counters *dc, int set, | |
+ enum ifs_tx_rx direction, int proto, int bytes) | |
+{ | |
+ switch (proto) { | |
+ case IPPROTO_TCP: | |
+ dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); | |
+ break; | |
+ case IPPROTO_UDP: | |
+ dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); | |
+ break; | |
+ case IPPROTO_IP: | |
+ default: | |
+ dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, | |
+ 1); | |
+ break; | |
+ } | |
+} | |
+ | |
+/* | |
+ * Update stats for the specified interface. Do nothing if the entry | |
+ * does not exist (when a device was never configured with an IP address). | |
+ * Called when an device is being unregistered. | |
+ */ | |
+static void iface_stat_update(struct net_device *net_dev, bool stash_only) | |
+{ | |
+ const struct net_device_stats *stats; | |
+ struct iface_stat *entry; | |
+ | |
+ stats = dev_get_stats(net_dev); | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ entry = get_iface_entry(net_dev->name); | |
+ if (entry == NULL) { | |
+ IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", | |
+ net_dev->name); | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ return; | |
+ } | |
+ | |
+ IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | |
+ net_dev->name, entry); | |
+ if (!entry->active) { | |
+ IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, | |
+ net_dev->name); | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ return; | |
+ } | |
+ | |
+ if (stash_only) { | |
+ entry->last_known[IFS_TX].bytes = stats->tx_bytes; | |
+ entry->last_known[IFS_TX].packets = stats->tx_packets; | |
+ entry->last_known[IFS_RX].bytes = stats->rx_bytes; | |
+ entry->last_known[IFS_RX].packets = stats->rx_packets; | |
+ entry->last_known_valid = true; | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "dev stats stashed rx/tx=%lu/%lu\n", __func__, | |
+ net_dev->name, stats->rx_bytes, stats->tx_bytes); | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ return; | |
+ } | |
+ entry->totals[IFS_TX].bytes += stats->tx_bytes; | |
+ entry->totals[IFS_TX].packets += stats->tx_packets; | |
+ entry->totals[IFS_RX].bytes += stats->rx_bytes; | |
+ entry->totals[IFS_RX].packets += stats->rx_packets; | |
+ /* We don't need the last_known[] anymore */ | |
+ entry->last_known_valid = false; | |
+ _iface_stat_set_active(entry, net_dev, false); | |
+ IF_DEBUG("qtaguid: %s(%s): " | |
+ "disable tracking. rx/tx=%lu/%lu\n", __func__, | |
+ net_dev->name, stats->rx_bytes, stats->tx_bytes); | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+} | |
+ | |
+static void tag_stat_update(struct tag_stat *tag_entry, | |
+ enum ifs_tx_rx direction, int proto, int bytes) | |
+{ | |
+ int active_set; | |
+ active_set = get_active_counter_set(tag_entry->tn.tag); | |
+ MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " | |
+ "dir=%d proto=%d bytes=%d)\n", | |
+ tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), | |
+ active_set, direction, proto, bytes); | |
+ data_counters_update(&tag_entry->counters, active_set, direction, | |
+ proto, bytes); | |
+ if (tag_entry->parent_counters) | |
+ data_counters_update(tag_entry->parent_counters, active_set, | |
+ direction, proto, bytes); | |
+} | |
+ | |
+/* | |
+ * Create a new entry for tracking the specified {acct_tag,uid_tag} within | |
+ * the interface. | |
+ * iface_entry->tag_stat_list_lock should be held. | |
+ */ | |
+static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, | |
+ tag_t tag) | |
+{ | |
+ struct tag_stat *new_tag_stat_entry = NULL; | |
+ IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" | |
+ " (uid=%u)\n", __func__, | |
+ iface_entry, tag, get_uid_from_tag(tag)); | |
+ new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); | |
+ if (!new_tag_stat_entry) { | |
+ pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); | |
+ goto done; | |
+ } | |
+ new_tag_stat_entry->tn.tag = tag; | |
+ tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); | |
+done: | |
+ return new_tag_stat_entry; | |
+} | |
+ | |
+static void if_tag_stat_update(const char *ifname, uid_t uid, | |
+ const struct sock *sk, enum ifs_tx_rx direction, | |
+ int proto, int bytes) | |
+{ | |
+ struct tag_stat *tag_stat_entry; | |
+ tag_t tag, acct_tag; | |
+ tag_t uid_tag; | |
+ struct data_counters *uid_tag_counters; | |
+ struct sock_tag *sock_tag_entry; | |
+ struct iface_stat *iface_entry; | |
+ struct tag_stat *new_tag_stat; | |
+ MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " | |
+ "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", | |
+ ifname, uid, sk, direction, proto, bytes); | |
+ | |
+ | |
+ iface_entry = get_iface_entry(ifname); | |
+ if (!iface_entry) { | |
+ pr_err("qtaguid: iface_stat: stat_update() %s not found\n", | |
+ ifname); | |
+ return; | |
+ } | |
+ /* It is ok to process data when an iface_entry is inactive */ | |
+ | |
+ MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", | |
+ ifname, iface_entry); | |
+ | |
+ /* | |
+ * Look for a tagged sock. | |
+ * It will have an acct_uid. | |
+ */ | |
+ sock_tag_entry = get_sock_stat(sk); | |
+ if (sock_tag_entry) { | |
+ tag = sock_tag_entry->tag; | |
+ acct_tag = get_atag_from_tag(tag); | |
+ uid_tag = get_utag_from_tag(tag); | |
+ } else { | |
+ acct_tag = make_atag_from_value(0); | |
+ tag = combine_atag_with_uid(acct_tag, uid); | |
+ uid_tag = make_tag_from_uid(uid); | |
+ } | |
+ MT_DEBUG("qtaguid: iface_stat: stat_update(): " | |
+ " looking for tag=0x%llx (uid=%u) in ife=%p\n", | |
+ tag, get_uid_from_tag(tag), iface_entry); | |
+ /* Loop over tag list under this interface for {acct_tag,uid_tag} */ | |
+ spin_lock_bh(&iface_entry->tag_stat_list_lock); | |
+ | |
+ tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | |
+ tag); | |
+ if (tag_stat_entry) { | |
+ /* | |
+ * Updating the {acct_tag, uid_tag} entry handles both stats: | |
+ * {0, uid_tag} will also get updated. | |
+ */ | |
+ tag_stat_update(tag_stat_entry, direction, proto, bytes); | |
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
+ return; | |
+ } | |
+ | |
+ /* Loop over tag list under this interface for {0,uid_tag} */ | |
+ tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | |
+ uid_tag); | |
+ if (!tag_stat_entry) { | |
+ /* Here: the base uid_tag did not exist */ | |
+ /* | |
+ * No parent counters. So | |
+ * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. | |
+ */ | |
+ new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); | |
+ uid_tag_counters = &new_tag_stat->counters; | |
+ } else { | |
+ uid_tag_counters = &tag_stat_entry->counters; | |
+ } | |
+ | |
+ if (acct_tag) { | |
+ new_tag_stat = create_if_tag_stat(iface_entry, tag); | |
+ new_tag_stat->parent_counters = uid_tag_counters; | |
+ } | |
+ tag_stat_update(new_tag_stat, direction, proto, bytes); | |
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
+} | |
+ | |
+static int iface_netdev_event_handler(struct notifier_block *nb, | |
+ unsigned long event, void *ptr) { | |
+ struct net_device *dev = ptr; | |
+ | |
+ if (unlikely(module_passive)) | |
+ return NOTIFY_DONE; | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: netdev_event(): " | |
+ "ev=0x%lx/%s netdev=%p->name=%s\n", | |
+ event, netdev_evt_str(event), dev, dev ? dev->name : ""); | |
+ | |
+ switch (event) { | |
+ case NETDEV_UP: | |
+ iface_stat_create(dev, NULL); | |
+ atomic64_inc(&qtu_events.iface_events); | |
+ break; | |
+ case NETDEV_DOWN: | |
+ case NETDEV_UNREGISTER: | |
+ iface_stat_update(dev, event == NETDEV_DOWN); | |
+ atomic64_inc(&qtu_events.iface_events); | |
+ break; | |
+ } | |
+ return NOTIFY_DONE; | |
+} | |
+ | |
+static int iface_inet6addr_event_handler(struct notifier_block *nb, | |
+ unsigned long event, void *ptr) | |
+{ | |
+ struct inet6_ifaddr *ifa = ptr; | |
+ struct net_device *dev; | |
+ | |
+ if (unlikely(module_passive)) | |
+ return NOTIFY_DONE; | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " | |
+ "ev=0x%lx/%s ifa=%p\n", | |
+ event, netdev_evt_str(event), ifa); | |
+ | |
+ switch (event) { | |
+ case NETDEV_UP: | |
+ BUG_ON(!ifa || !ifa->idev); | |
+ dev = (struct net_device *)ifa->idev->dev; | |
+ iface_stat_create_ipv6(dev, ifa); | |
+ atomic64_inc(&qtu_events.iface_events); | |
+ break; | |
+ case NETDEV_DOWN: | |
+ case NETDEV_UNREGISTER: | |
+ BUG_ON(!ifa || !ifa->idev); | |
+ dev = (struct net_device *)ifa->idev->dev; | |
+ iface_stat_update(dev, event == NETDEV_DOWN); | |
+ atomic64_inc(&qtu_events.iface_events); | |
+ break; | |
+ } | |
+ return NOTIFY_DONE; | |
+} | |
+ | |
+static int iface_inetaddr_event_handler(struct notifier_block *nb, | |
+ unsigned long event, void *ptr) | |
+{ | |
+ struct in_ifaddr *ifa = ptr; | |
+ struct net_device *dev; | |
+ | |
+ if (unlikely(module_passive)) | |
+ return NOTIFY_DONE; | |
+ | |
+ IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " | |
+ "ev=0x%lx/%s ifa=%p\n", | |
+ event, netdev_evt_str(event), ifa); | |
+ | |
+ switch (event) { | |
+ case NETDEV_UP: | |
+ BUG_ON(!ifa || !ifa->ifa_dev); | |
+ dev = ifa->ifa_dev->dev; | |
+ iface_stat_create(dev, ifa); | |
+ atomic64_inc(&qtu_events.iface_events); | |
+ break; | |
+ case NETDEV_DOWN: | |
+ case NETDEV_UNREGISTER: | |
+ BUG_ON(!ifa || !ifa->ifa_dev); | |
+ dev = ifa->ifa_dev->dev; | |
+ iface_stat_update(dev, event == NETDEV_DOWN); | |
+ atomic64_inc(&qtu_events.iface_events); | |
+ break; | |
+ } | |
+ return NOTIFY_DONE; | |
+} | |
+ | |
+static struct notifier_block iface_netdev_notifier_blk = { | |
+ .notifier_call = iface_netdev_event_handler, | |
+}; | |
+ | |
+static struct notifier_block iface_inetaddr_notifier_blk = { | |
+ .notifier_call = iface_inetaddr_event_handler, | |
+}; | |
+ | |
+static struct notifier_block iface_inet6addr_notifier_blk = { | |
+ .notifier_call = iface_inet6addr_event_handler, | |
+}; | |
+ | |
+static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) | |
+{ | |
+ int err; | |
+ | |
+ iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); | |
+ if (!iface_stat_procdir) { | |
+ pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); | |
+ err = -1; | |
+ goto err; | |
+ } | |
+ | |
+ iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, | |
+ proc_iface_perms, | |
+ parent_procdir); | |
+ if (!iface_stat_all_procfile) { | |
+ pr_err("qtaguid: iface_stat: init " | |
+ " failed to create stat_all proc entry\n"); | |
+ err = -1; | |
+ goto err_zap_entry; | |
+ } | |
+ iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; | |
+ | |
+ | |
+ err = register_netdevice_notifier(&iface_netdev_notifier_blk); | |
+ if (err) { | |
+ pr_err("qtaguid: iface_stat: init " | |
+ "failed to register dev event handler\n"); | |
+ goto err_zap_all_stats_entry; | |
+ } | |
+ err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); | |
+ if (err) { | |
+ pr_err("qtaguid: iface_stat: init " | |
+ "failed to register ipv4 dev event handler\n"); | |
+ goto err_unreg_nd; | |
+ } | |
+ | |
+ err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); | |
+ if (err) { | |
+ pr_err("qtaguid: iface_stat: init " | |
+ "failed to register ipv6 dev event handler\n"); | |
+ goto err_unreg_ip4_addr; | |
+ } | |
+ return 0; | |
+ | |
+err_unreg_ip4_addr: | |
+ unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); | |
+err_unreg_nd: | |
+ unregister_netdevice_notifier(&iface_netdev_notifier_blk); | |
+err_zap_all_stats_entry: | |
+ remove_proc_entry(iface_stat_all_procfilename, parent_procdir); | |
+err_zap_entry: | |
+ remove_proc_entry(iface_stat_procdirname, parent_procdir); | |
+err: | |
+ return err; | |
+} | |
+ | |
+static struct sock *qtaguid_find_sk(const struct sk_buff *skb, | |
+ struct xt_action_param *par) | |
+{ | |
+ struct sock *sk; | |
+ unsigned int hook_mask = (1 << par->hooknum); | |
+ | |
+ MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, | |
+ par->hooknum, par->family); | |
+ | |
+ /* | |
+ * Let's not abuse the the xt_socket_get*_sk(), or else it will | |
+ * return garbage SKs. | |
+ */ | |
+ if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) | |
+ return NULL; | |
+ | |
+ switch (par->family) { | |
+ case NFPROTO_IPV6: | |
+ sk = xt_socket_get6_sk(skb, par); | |
+ break; | |
+ case NFPROTO_IPV4: | |
+ sk = xt_socket_get4_sk(skb, par); | |
+ break; | |
+ default: | |
+ return NULL; | |
+ } | |
+ | |
+ /* | |
+ * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. | |
+ * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 | |
+ * Not fixed in 3.0-r3 :( | |
+ */ | |
+ if (sk) { | |
+ MT_DEBUG("qtaguid: %p->sk_proto=%u " | |
+ "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); | |
+ if (sk->sk_state == TCP_TIME_WAIT) { | |
+ xt_socket_put_sk(sk); | |
+ sk = NULL; | |
+ } | |
+ } | |
+ return sk; | |
+} | |
+ | |
+static void account_for_uid(const struct sk_buff *skb, | |
+ const struct sock *alternate_sk, uid_t uid, | |
+ struct xt_action_param *par) | |
+{ | |
+ const struct net_device *el_dev; | |
+ | |
+ if (!skb->dev) { | |
+ MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); | |
+ el_dev = par->in ? : par->out; | |
+ } else { | |
+ const struct net_device *other_dev; | |
+ el_dev = skb->dev; | |
+ other_dev = par->in ? : par->out; | |
+ if (el_dev != other_dev) { | |
+ MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " | |
+ "par->(in/out)=%p %s\n", | |
+ par->hooknum, el_dev, el_dev->name, other_dev, | |
+ other_dev->name); | |
+ } | |
+ } | |
+ | |
+ if (unlikely(!el_dev)) { | |
+ pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); | |
+ } else if (unlikely(!el_dev->name)) { | |
+ pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); | |
+ } else { | |
+ MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", | |
+ par->hooknum, | |
+ el_dev->name, | |
+ el_dev->type); | |
+ | |
+ if_tag_stat_update(el_dev->name, uid, | |
+ skb->sk ? skb->sk : alternate_sk, | |
+ par->in ? IFS_RX : IFS_TX, | |
+ ip_hdr(skb)->protocol, skb->len); | |
+ } | |
+} | |
+ | |
+static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) | |
+{ | |
+ const struct xt_qtaguid_match_info *info = par->matchinfo; | |
+ const struct file *filp; | |
+ bool got_sock = false; | |
+ struct sock *sk; | |
+ uid_t sock_uid; | |
+ bool res; | |
+ | |
+ if (unlikely(module_passive)) | |
+ return (info->match ^ info->invert) == 0; | |
+ | |
+ MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", | |
+ par->hooknum, skb, par->in, par->out, par->family); | |
+ | |
+ atomic64_inc(&qtu_events.match_calls); | |
+ if (skb == NULL) { | |
+ res = (info->match ^ info->invert) == 0; | |
+ goto ret_res; | |
+ } | |
+ | |
+ sk = skb->sk; | |
+ | |
+ if (sk == NULL) { | |
+ /* | |
+ * A missing sk->sk_socket happens when packets are in-flight | |
+ * and the matching socket is already closed and gone. | |
+ */ | |
+ sk = qtaguid_find_sk(skb, par); | |
+ /* | |
+ * If we got the socket from the find_sk(), we will need to put | |
+ * it back, as nf_tproxy_get_sock_v4() got it. | |
+ */ | |
+ got_sock = sk; | |
+ if (sk) | |
+ atomic64_inc(&qtu_events.match_found_sk_in_ct); | |
+ else | |
+ atomic64_inc(&qtu_events.match_found_no_sk_in_ct); | |
+ } else { | |
+ atomic64_inc(&qtu_events.match_found_sk); | |
+ } | |
+ MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", | |
+ par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); | |
+ if (sk != NULL) { | |
+ MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", | |
+ par->hooknum, sk, sk->sk_socket, | |
+ sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); | |
+ filp = sk->sk_socket ? sk->sk_socket->file : NULL; | |
+ MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", | |
+ par->hooknum, filp ? filp->f_cred->fsuid : -1); | |
+ } | |
+ | |
+ if (sk == NULL || sk->sk_socket == NULL) { | |
+ /* | |
+ * Here, the qtaguid_find_sk() using connection tracking | |
+ * couldn't find the owner, so for now we just count them | |
+ * against the system. | |
+ */ | |
+ /* | |
+ * TODO: unhack how to force just accounting. | |
+ * For now we only do iface stats when the uid-owner is not | |
+ * requested. | |
+ */ | |
+ if (!(info->match & XT_QTAGUID_UID)) | |
+ account_for_uid(skb, sk, 0, par); | |
+ MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", | |
+ par->hooknum, | |
+ sk ? sk->sk_socket : NULL); | |
+ res = (info->match ^ info->invert) == 0; | |
+ atomic64_inc(&qtu_events.match_no_sk); | |
+ goto put_sock_ret_res; | |
+ } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { | |
+ res = false; | |
+ goto put_sock_ret_res; | |
+ } | |
+ filp = sk->sk_socket->file; | |
+ if (filp == NULL) { | |
+ MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); | |
+ account_for_uid(skb, sk, 0, par); | |
+ res = ((info->match ^ info->invert) & | |
+ (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; | |
+ atomic64_inc(&qtu_events.match_no_sk_file); | |
+ goto put_sock_ret_res; | |
+ } | |
+ sock_uid = filp->f_cred->fsuid; | |
+ /* | |
+ * TODO: unhack how to force just accounting. | |
+ * For now we only do iface stats when the uid-owner is not requested | |
+ */ | |
+ if (!(info->match & XT_QTAGUID_UID)) | |
+ account_for_uid(skb, sk, sock_uid, par); | |
+ | |
+ /* | |
+ * The following two tests fail the match when: | |
+ * id not in range AND no inverted condition requested | |
+ * or id in range AND inverted condition requested | |
+ * Thus (!a && b) || (a && !b) == a ^ b | |
+ */ | |
+ if (info->match & XT_QTAGUID_UID) | |
+ if ((filp->f_cred->fsuid >= info->uid_min && | |
+ filp->f_cred->fsuid <= info->uid_max) ^ | |
+ !(info->invert & XT_QTAGUID_UID)) { | |
+ MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", | |
+ par->hooknum); | |
+ res = false; | |
+ goto put_sock_ret_res; | |
+ } | |
+ if (info->match & XT_QTAGUID_GID) | |
+ if ((filp->f_cred->fsgid >= info->gid_min && | |
+ filp->f_cred->fsgid <= info->gid_max) ^ | |
+ !(info->invert & XT_QTAGUID_GID)) { | |
+ MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", | |
+ par->hooknum); | |
+ res = false; | |
+ goto put_sock_ret_res; | |
+ } | |
+ | |
+ MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); | |
+ res = true; | |
+ | |
+put_sock_ret_res: | |
+ if (got_sock) | |
+ xt_socket_put_sk(sk); | |
+ret_res: | |
+ MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); | |
+ return res; | |
+} | |
+ | |
+#ifdef DDEBUG | |
+/* This function is not in xt_qtaguid_print.c because of locks visibility */ | |
+static void prdebug_full_state(int indent_level, const char *fmt, ...) | |
+{ | |
+ va_list args; | |
+ char *fmt_buff; | |
+ char *buff; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ fmt_buff = kasprintf(GFP_ATOMIC, | |
+ "qtaguid: %s(): %s {\n", __func__, fmt); | |
+ BUG_ON(!fmt_buff); | |
+ va_start(args, fmt); | |
+ buff = kvasprintf(GFP_ATOMIC, | |
+ fmt_buff, args); | |
+ BUG_ON(!buff); | |
+ pr_debug("%s", buff); | |
+ kfree(fmt_buff); | |
+ kfree(buff); | |
+ va_end(args); | |
+ | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ prdebug_sock_tag_tree(indent_level, &sock_tag_tree); | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); | |
+ prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ prdebug_iface_stat_list(indent_level, &iface_stat_list); | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ | |
+ pr_debug("qtaguid: %s(): }\n", __func__); | |
+} | |
+#else | |
+static void prdebug_full_state(int indent_level, const char *fmt, ...) {} | |
+#endif | |
+ | |
+/* | |
+ * Procfs reader to get all active socket tags using style "1)" as described in | |
+ * fs/proc/generic.c | |
+ */ | |
+static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, | |
+ off_t items_to_skip, int char_count, int *eof, | |
+ void *data) | |
+{ | |
+ char *outp = page; | |
+ int len; | |
+ uid_t uid; | |
+ struct rb_node *node; | |
+ struct sock_tag *sock_tag_entry; | |
+ int item_index = 0; | |
+ int indent_level = 0; | |
+ long f_count; | |
+ | |
+ if (unlikely(module_passive)) { | |
+ *eof = 1; | |
+ return 0; | |
+ } | |
+ | |
+ if (*eof) | |
+ return 0; | |
+ | |
+ CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", | |
+ page, items_to_skip, char_count, *eof); | |
+ | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ for (node = rb_first(&sock_tag_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ if (item_index++ < items_to_skip) | |
+ continue; | |
+ sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | |
+ uid = get_uid_from_tag(sock_tag_entry->tag); | |
+ CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " | |
+ "pid=%u\n", | |
+ sock_tag_entry->sk, | |
+ sock_tag_entry->tag, | |
+ uid, | |
+ sock_tag_entry->pid | |
+ ); | |
+ f_count = atomic_long_read( | |
+ &sock_tag_entry->socket->file->f_count); | |
+ len = snprintf(outp, char_count, | |
+ "sock=%p tag=0x%llx (uid=%u) pid=%u " | |
+ "f_count=%lu\n", | |
+ sock_tag_entry->sk, | |
+ sock_tag_entry->tag, uid, | |
+ sock_tag_entry->pid, f_count); | |
+ if (len >= char_count) { | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ *outp = '\0'; | |
+ return outp - page; | |
+ } | |
+ outp += len; | |
+ char_count -= len; | |
+ (*num_items_returned)++; | |
+ } | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ | |
+ if (item_index++ >= items_to_skip) { | |
+ len = snprintf(outp, char_count, | |
+ "events: sockets_tagged=%llu " | |
+ "sockets_untagged=%llu " | |
+ "counter_set_changes=%llu " | |
+ "delete_cmds=%llu " | |
+ "iface_events=%llu " | |
+ "match_calls=%llu " | |
+ "match_found_sk=%llu " | |
+ "match_found_sk_in_ct=%llu " | |
+ "match_found_no_sk_in_ct=%llu " | |
+ "match_no_sk=%llu " | |
+ "match_no_sk_file=%llu\n", | |
+ atomic64_read(&qtu_events.sockets_tagged), | |
+ atomic64_read(&qtu_events.sockets_untagged), | |
+ atomic64_read(&qtu_events.counter_set_changes), | |
+ atomic64_read(&qtu_events.delete_cmds), | |
+ atomic64_read(&qtu_events.iface_events), | |
+ atomic64_read(&qtu_events.match_calls), | |
+ atomic64_read(&qtu_events.match_found_sk), | |
+ atomic64_read(&qtu_events.match_found_sk_in_ct), | |
+ atomic64_read( | |
+ &qtu_events.match_found_no_sk_in_ct), | |
+ atomic64_read(&qtu_events.match_no_sk), | |
+ atomic64_read(&qtu_events.match_no_sk_file)); | |
+ if (len >= char_count) { | |
+ *outp = '\0'; | |
+ return outp - page; | |
+ } | |
+ outp += len; | |
+ char_count -= len; | |
+ (*num_items_returned)++; | |
+ } | |
+ | |
+ /* Count the following as part of the last item_index */ | |
+ if (item_index > items_to_skip) { | |
+ prdebug_full_state(indent_level, "proc ctrl"); | |
+ } | |
+ | |
+ *eof = 1; | |
+ return outp - page; | |
+} | |
+ | |
+/* | |
+ * Delete socket tags, and stat tags associated with a given | |
+ * accouting tag and uid. | |
+ */ | |
+static int ctrl_cmd_delete(const char *input) | |
+{ | |
+ char cmd; | |
+ uid_t uid; | |
+ uid_t entry_uid; | |
+ tag_t acct_tag; | |
+ tag_t tag; | |
+ int res, argc; | |
+ struct iface_stat *iface_entry; | |
+ struct rb_node *node; | |
+ struct sock_tag *st_entry; | |
+ struct rb_root st_to_free_tree = RB_ROOT; | |
+ struct tag_stat *ts_entry; | |
+ struct tag_counter_set *tcs_entry; | |
+ struct tag_ref *tr_entry; | |
+ struct uid_tag_data *utd_entry; | |
+ | |
+ argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " | |
+ "user_tag=0x%llx uid=%u\n", input, argc, cmd, | |
+ acct_tag, uid); | |
+ if (argc < 2) { | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ if (!valid_atag(acct_tag)) { | |
+ pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ if (argc < 3) { | |
+ uid = current_fsuid(); | |
+ } else if (!can_impersonate_uid(uid)) { | |
+ pr_info("qtaguid: ctrl_delete(%s): " | |
+ "insufficient priv from pid=%u tgid=%u uid=%u\n", | |
+ input, current->pid, current->tgid, current_fsuid()); | |
+ res = -EPERM; | |
+ goto err; | |
+ } | |
+ | |
+ tag = combine_atag_with_uid(acct_tag, uid); | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
+ "looking for tag=0x%llx (uid=%u)\n", | |
+ input, tag, uid); | |
+ | |
+ /* Delete socket tags */ | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ node = rb_first(&sock_tag_tree); | |
+ while (node) { | |
+ st_entry = rb_entry(node, struct sock_tag, sock_node); | |
+ entry_uid = get_uid_from_tag(st_entry->tag); | |
+ node = rb_next(node); | |
+ if (entry_uid != uid) | |
+ continue; | |
+ | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", | |
+ input, st_entry->tag, entry_uid); | |
+ | |
+ if (!acct_tag || st_entry->tag == tag) { | |
+ rb_erase(&st_entry->sock_node, &sock_tag_tree); | |
+ /* Can't sockfd_put() within spinlock, do it later. */ | |
+ sock_tag_tree_insert(st_entry, &st_to_free_tree); | |
+ tr_entry = lookup_tag_ref(st_entry->tag, NULL); | |
+ BUG_ON(tr_entry->num_sock_tags <= 0); | |
+ tr_entry->num_sock_tags--; | |
+ /* | |
+ * TODO: remove if, and start failing. | |
+ * This is a hack to work around the fact that in some | |
+ * places we have "if (IS_ERR_OR_NULL(pqd_entry))" | |
+ * and are trying to work around apps | |
+ * that didn't open the /dev/xt_qtaguid. | |
+ */ | |
+ if (st_entry->list.next && st_entry->list.prev) | |
+ list_del(&st_entry->list); | |
+ } | |
+ } | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ | |
+ sock_tag_tree_erase(&st_to_free_tree); | |
+ | |
+ /* Delete tag counter-sets */ | |
+ spin_lock_bh(&tag_counter_set_list_lock); | |
+ /* Counter sets are only on the uid tag, not full tag */ | |
+ tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | |
+ if (tcs_entry) { | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
+ "erase tcs: tag=0x%llx (uid=%u) set=%d\n", | |
+ input, | |
+ tcs_entry->tn.tag, | |
+ get_uid_from_tag(tcs_entry->tn.tag), | |
+ tcs_entry->active_set); | |
+ rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); | |
+ kfree(tcs_entry); | |
+ } | |
+ spin_unlock_bh(&tag_counter_set_list_lock); | |
+ | |
+ /* | |
+ * If acct_tag is 0, then all entries belonging to uid are | |
+ * erased. | |
+ */ | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ list_for_each_entry(iface_entry, &iface_stat_list, list) { | |
+ spin_lock_bh(&iface_entry->tag_stat_list_lock); | |
+ node = rb_first(&iface_entry->tag_stat_tree); | |
+ while (node) { | |
+ ts_entry = rb_entry(node, struct tag_stat, tn.node); | |
+ entry_uid = get_uid_from_tag(ts_entry->tn.tag); | |
+ node = rb_next(node); | |
+ | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
+ "ts tag=0x%llx (uid=%u)\n", | |
+ input, ts_entry->tn.tag, entry_uid); | |
+ | |
+ if (entry_uid != uid) | |
+ continue; | |
+ if (!acct_tag || ts_entry->tn.tag == tag) { | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
+ "erase ts: %s 0x%llx %u\n", | |
+ input, iface_entry->ifname, | |
+ get_atag_from_tag(ts_entry->tn.tag), | |
+ entry_uid); | |
+ rb_erase(&ts_entry->tn.node, | |
+ &iface_entry->tag_stat_tree); | |
+ kfree(ts_entry); | |
+ } | |
+ } | |
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
+ } | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ | |
+ /* Cleanup the uid_tag_data */ | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ node = rb_first(&uid_tag_data_tree); | |
+ while (node) { | |
+ utd_entry = rb_entry(node, struct uid_tag_data, node); | |
+ entry_uid = utd_entry->uid; | |
+ node = rb_next(node); | |
+ | |
+ CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
+ "utd uid=%u\n", | |
+ input, entry_uid); | |
+ | |
+ if (entry_uid != uid) | |
+ continue; | |
+ /* | |
+ * Go over the tag_refs, and those that don't have | |
+ * sock_tags using them are freed. | |
+ */ | |
+ put_tag_ref_tree(tag, utd_entry); | |
+ put_utd_entry(utd_entry); | |
+ } | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ | |
+ atomic64_inc(&qtu_events.delete_cmds); | |
+ res = 0; | |
+ | |
+err: | |
+ return res; | |
+} | |
+ | |
+static int ctrl_cmd_counter_set(const char *input) | |
+{ | |
+ char cmd; | |
+ uid_t uid = 0; | |
+ tag_t tag; | |
+ int res, argc; | |
+ struct tag_counter_set *tcs; | |
+ int counter_set; | |
+ | |
+ argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); | |
+ CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " | |
+ "set=%d uid=%u\n", input, argc, cmd, | |
+ counter_set, uid); | |
+ if (argc != 3) { | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { | |
+ pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", | |
+ input); | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ if (!can_manipulate_uids()) { | |
+ pr_info("qtaguid: ctrl_counterset(%s): " | |
+ "insufficient priv from pid=%u tgid=%u uid=%u\n", | |
+ input, current->pid, current->tgid, current_fsuid()); | |
+ res = -EPERM; | |
+ goto err; | |
+ } | |
+ | |
+ tag = make_tag_from_uid(uid); | |
+ spin_lock_bh(&tag_counter_set_list_lock); | |
+ tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | |
+ if (!tcs) { | |
+ tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); | |
+ if (!tcs) { | |
+ spin_unlock_bh(&tag_counter_set_list_lock); | |
+ pr_err("qtaguid: ctrl_counterset(%s): " | |
+ "failed to alloc counter set\n", | |
+ input); | |
+ res = -ENOMEM; | |
+ goto err; | |
+ } | |
+ tcs->tn.tag = tag; | |
+ tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); | |
+ CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " | |
+ "(uid=%u) set=%d\n", | |
+ input, tag, get_uid_from_tag(tag), counter_set); | |
+ } | |
+ tcs->active_set = counter_set; | |
+ spin_unlock_bh(&tag_counter_set_list_lock); | |
+ atomic64_inc(&qtu_events.counter_set_changes); | |
+ res = 0; | |
+ | |
+err: | |
+ return res; | |
+} | |
+ | |
+static int ctrl_cmd_tag(const char *input) | |
+{ | |
+ char cmd; | |
+ int sock_fd = 0; | |
+ uid_t uid = 0; | |
+ tag_t acct_tag = make_atag_from_value(0); | |
+ tag_t full_tag; | |
+ struct socket *el_socket; | |
+ int res, argc; | |
+ struct sock_tag *sock_tag_entry; | |
+ struct tag_ref *tag_ref_entry; | |
+ struct uid_tag_data *uid_tag_data_entry; | |
+ struct proc_qtu_data *pqd_entry; | |
+ | |
+ /* Unassigned args will get defaulted later. */ | |
+ argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " | |
+ "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, | |
+ acct_tag, uid); | |
+ if (argc < 2) { | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | |
+ if (!el_socket) { | |
+ pr_info("qtaguid: ctrl_tag(%s): failed to lookup" | |
+ " sock_fd=%d err=%d\n", input, sock_fd, res); | |
+ goto err; | |
+ } | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", | |
+ input, atomic_long_read(&el_socket->file->f_count), | |
+ el_socket->sk); | |
+ if (argc < 3) { | |
+ acct_tag = make_atag_from_value(0); | |
+ } else if (!valid_atag(acct_tag)) { | |
+ pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); | |
+ res = -EINVAL; | |
+ goto err_put; | |
+ } | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): " | |
+ "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " | |
+ "in_group=%d in_egroup=%d\n", | |
+ input, current->pid, current->tgid, current_uid(), | |
+ current_euid(), current_fsuid(), | |
+ in_group_p(proc_ctrl_write_gid), | |
+ in_egroup_p(proc_ctrl_write_gid)); | |
+ if (argc < 4) { | |
+ uid = current_fsuid(); | |
+ } else if (!can_impersonate_uid(uid)) { | |
+ pr_info("qtaguid: ctrl_tag(%s): " | |
+ "insufficient priv from pid=%u tgid=%u uid=%u\n", | |
+ input, current->pid, current->tgid, current_fsuid()); | |
+ res = -EPERM; | |
+ goto err_put; | |
+ } | |
+ full_tag = combine_atag_with_uid(acct_tag, uid); | |
+ | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ sock_tag_entry = get_sock_stat_nl(el_socket->sk); | |
+ tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); | |
+ if (IS_ERR(tag_ref_entry)) { | |
+ res = PTR_ERR(tag_ref_entry); | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ goto err_put; | |
+ } | |
+ tag_ref_entry->num_sock_tags++; | |
+ if (sock_tag_entry) { | |
+ struct tag_ref *prev_tag_ref_entry; | |
+ | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " | |
+ "st@%p ...->f_count=%ld\n", | |
+ input, el_socket->sk, sock_tag_entry, | |
+ atomic_long_read(&el_socket->file->f_count)); | |
+ /* | |
+ * This is a re-tagging, so release the sock_fd that was | |
+ * locked at the time of the 1st tagging. | |
+ * There is still the ref from this call's sockfd_lookup() so | |
+ * it can be done within the spinlock. | |
+ */ | |
+ sockfd_put(sock_tag_entry->socket); | |
+ prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, | |
+ &uid_tag_data_entry); | |
+ BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); | |
+ BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); | |
+ prev_tag_ref_entry->num_sock_tags--; | |
+ sock_tag_entry->tag = full_tag; | |
+ } else { | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", | |
+ input, el_socket->sk); | |
+ sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), | |
+ GFP_ATOMIC); | |
+ if (!sock_tag_entry) { | |
+ pr_err("qtaguid: ctrl_tag(%s): " | |
+ "socket tag alloc failed\n", | |
+ input); | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ res = -ENOMEM; | |
+ goto err_tag_unref_put; | |
+ } | |
+ sock_tag_entry->sk = el_socket->sk; | |
+ sock_tag_entry->socket = el_socket; | |
+ sock_tag_entry->pid = current->tgid; | |
+ sock_tag_entry->tag = combine_atag_with_uid(acct_tag, | |
+ uid); | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ pqd_entry = proc_qtu_data_tree_search( | |
+ &proc_qtu_data_tree, current->tgid); | |
+ /* | |
+ * TODO: remove if, and start failing. | |
+ * At first, we want to catch user-space code that is not | |
+ * opening the /dev/xt_qtaguid. | |
+ */ | |
+ if (IS_ERR_OR_NULL(pqd_entry)) | |
+ pr_warn_once( | |
+ "qtaguid: %s(): " | |
+ "User space forgot to open /dev/xt_qtaguid? " | |
+ "pid=%u tgid=%u uid=%u\n", __func__, | |
+ current->pid, current->tgid, | |
+ current_fsuid()); | |
+ else | |
+ list_add(&sock_tag_entry->list, | |
+ &pqd_entry->sock_tag_list); | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ | |
+ sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); | |
+ atomic64_inc(&qtu_events.sockets_tagged); | |
+ } | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ /* We keep the ref to the socket (file) until it is untagged */ | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", | |
+ input, sock_tag_entry, | |
+ atomic_long_read(&el_socket->file->f_count)); | |
+ return 0; | |
+ | |
+err_tag_unref_put: | |
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0); | |
+ tag_ref_entry->num_sock_tags--; | |
+ free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); | |
+err_put: | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", | |
+ input, atomic_long_read(&el_socket->file->f_count) - 1); | |
+ /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | |
+ sockfd_put(el_socket); | |
+ return res; | |
+ | |
+err: | |
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); | |
+ return res; | |
+} | |
+ | |
+static int ctrl_cmd_untag(const char *input) | |
+{ | |
+ char cmd; | |
+ int sock_fd = 0; | |
+ struct socket *el_socket; | |
+ int res, argc; | |
+ struct sock_tag *sock_tag_entry; | |
+ struct tag_ref *tag_ref_entry; | |
+ struct uid_tag_data *utd_entry; | |
+ struct proc_qtu_data *pqd_entry; | |
+ | |
+ argc = sscanf(input, "%c %d", &cmd, &sock_fd); | |
+ CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", | |
+ input, argc, cmd, sock_fd); | |
+ if (argc < 2) { | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | |
+ if (!el_socket) { | |
+ pr_info("qtaguid: ctrl_untag(%s): failed to lookup" | |
+ " sock_fd=%d err=%d\n", input, sock_fd, res); | |
+ goto err; | |
+ } | |
+ CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", | |
+ input, atomic_long_read(&el_socket->file->f_count), | |
+ el_socket->sk); | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ sock_tag_entry = get_sock_stat_nl(el_socket->sk); | |
+ if (!sock_tag_entry) { | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ res = -EINVAL; | |
+ goto err_put; | |
+ } | |
+ /* | |
+ * The socket already belongs to the current process | |
+ * so it can do whatever it wants to it. | |
+ */ | |
+ rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); | |
+ | |
+ tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); | |
+ BUG_ON(!tag_ref_entry); | |
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0); | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ pqd_entry = proc_qtu_data_tree_search( | |
+ &proc_qtu_data_tree, current->tgid); | |
+ /* | |
+ * TODO: remove if, and start failing. | |
+ * At first, we want to catch user-space code that is not | |
+ * opening the /dev/xt_qtaguid. | |
+ */ | |
+ if (IS_ERR_OR_NULL(pqd_entry)) | |
+ pr_warn_once("qtaguid: %s(): " | |
+ "User space forgot to open /dev/xt_qtaguid? " | |
+ "pid=%u tgid=%u uid=%u\n", __func__, | |
+ current->pid, current->tgid, current_fsuid()); | |
+ else | |
+ list_del(&sock_tag_entry->list); | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ /* | |
+ * We don't free tag_ref from the utd_entry here, | |
+ * only during a cmd_delete(). | |
+ */ | |
+ tag_ref_entry->num_sock_tags--; | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ /* | |
+ * Release the sock_fd that was grabbed at tag time, | |
+ * and once more for the sockfd_lookup() here. | |
+ */ | |
+ sockfd_put(sock_tag_entry->socket); | |
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", | |
+ input, sock_tag_entry, | |
+ atomic_long_read(&el_socket->file->f_count) - 1); | |
+ sockfd_put(el_socket); | |
+ | |
+ kfree(sock_tag_entry); | |
+ atomic64_inc(&qtu_events.sockets_untagged); | |
+ | |
+ return 0; | |
+ | |
+err_put: | |
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", | |
+ input, atomic_long_read(&el_socket->file->f_count) - 1); | |
+ /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | |
+ sockfd_put(el_socket); | |
+ return res; | |
+ | |
+err: | |
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); | |
+ return res; | |
+} | |
+ | |
+static int qtaguid_ctrl_parse(const char *input, int count) | |
+{ | |
+ char cmd; | |
+ int res; | |
+ | |
+ cmd = input[0]; | |
+ /* Collect params for commands */ | |
+ switch (cmd) { | |
+ case 'd': | |
+ res = ctrl_cmd_delete(input); | |
+ break; | |
+ | |
+ case 's': | |
+ res = ctrl_cmd_counter_set(input); | |
+ break; | |
+ | |
+ case 't': | |
+ res = ctrl_cmd_tag(input); | |
+ break; | |
+ | |
+ case 'u': | |
+ res = ctrl_cmd_untag(input); | |
+ break; | |
+ | |
+ default: | |
+ res = -EINVAL; | |
+ goto err; | |
+ } | |
+ if (!res) | |
+ res = count; | |
+err: | |
+ CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); | |
+ return res; | |
+} | |
+ | |
+#define MAX_QTAGUID_CTRL_INPUT_LEN 255 | |
+static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, | |
+ unsigned long count, void *data) | |
+{ | |
+ char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; | |
+ | |
+ if (unlikely(module_passive)) | |
+ return count; | |
+ | |
+ if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) | |
+ return -EINVAL; | |
+ | |
+ if (copy_from_user(input_buf, buffer, count)) | |
+ return -EFAULT; | |
+ | |
+ input_buf[count] = '\0'; | |
+ return qtaguid_ctrl_parse(input_buf, count); | |
+} | |
+ | |
+struct proc_print_info { | |
+ char *outp; | |
+ char **num_items_returned; | |
+ struct iface_stat *iface_entry; | |
+ struct tag_stat *ts_entry; | |
+ int item_index; | |
+ int items_to_skip; | |
+ int char_count; | |
+}; | |
+ | |
+static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) | |
+{ | |
+ int len; | |
+ struct data_counters *cnts; | |
+ | |
+ if (!ppi->item_index) { | |
+ if (ppi->item_index++ < ppi->items_to_skip) | |
+ return 0; | |
+ len = snprintf(ppi->outp, ppi->char_count, | |
+ "idx iface acct_tag_hex uid_tag_int cnt_set " | |
+ "rx_bytes rx_packets " | |
+ "tx_bytes tx_packets " | |
+ "rx_tcp_bytes rx_tcp_packets " | |
+ "rx_udp_bytes rx_udp_packets " | |
+ "rx_other_bytes rx_other_packets " | |
+ "tx_tcp_bytes tx_tcp_packets " | |
+ "tx_udp_bytes tx_udp_packets " | |
+ "tx_other_bytes tx_other_packets\n"); | |
+ } else { | |
+ tag_t tag = ppi->ts_entry->tn.tag; | |
+ uid_t stat_uid = get_uid_from_tag(tag); | |
+ | |
+ if (!can_read_other_uid_stats(stat_uid)) { | |
+ CT_DEBUG("qtaguid: stats line: " | |
+ "%s 0x%llx %u: insufficient priv " | |
+ "from pid=%u tgid=%u uid=%u\n", | |
+ ppi->iface_entry->ifname, | |
+ get_atag_from_tag(tag), stat_uid, | |
+ current->pid, current->tgid, current_fsuid()); | |
+ return 0; | |
+ } | |
+ if (ppi->item_index++ < ppi->items_to_skip) | |
+ return 0; | |
+ cnts = &ppi->ts_entry->counters; | |
+ len = snprintf( | |
+ ppi->outp, ppi->char_count, | |
+ "%d %s 0x%llx %u %u " | |
+ "%llu %llu " | |
+ "%llu %llu " | |
+ "%llu %llu " | |
+ "%llu %llu " | |
+ "%llu %llu " | |
+ "%llu %llu " | |
+ "%llu %llu " | |
+ "%llu %llu\n", | |
+ ppi->item_index, | |
+ ppi->iface_entry->ifname, | |
+ get_atag_from_tag(tag), | |
+ stat_uid, | |
+ cnt_set, | |
+ dc_sum_bytes(cnts, cnt_set, IFS_RX), | |
+ dc_sum_packets(cnts, cnt_set, IFS_RX), | |
+ dc_sum_bytes(cnts, cnt_set, IFS_TX), | |
+ dc_sum_packets(cnts, cnt_set, IFS_TX), | |
+ cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, | |
+ cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, | |
+ cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, | |
+ cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, | |
+ cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, | |
+ cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, | |
+ cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, | |
+ cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, | |
+ cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, | |
+ cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, | |
+ cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, | |
+ cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); | |
+ } | |
+ return len; | |
+} | |
+ | |
+static bool pp_sets(struct proc_print_info *ppi) | |
+{ | |
+ int len; | |
+ int counter_set; | |
+ for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; | |
+ counter_set++) { | |
+ len = pp_stats_line(ppi, counter_set); | |
+ if (len >= ppi->char_count) { | |
+ *ppi->outp = '\0'; | |
+ return false; | |
+ } | |
+ if (len) { | |
+ ppi->outp += len; | |
+ ppi->char_count -= len; | |
+ (*ppi->num_items_returned)++; | |
+ } | |
+ } | |
+ return true; | |
+} | |
+ | |
+/* | |
+ * Procfs reader to get all tag stats using style "1)" as described in | |
+ * fs/proc/generic.c | |
+ * Groups all protocols tx/rx bytes. | |
+ */ | |
+static int qtaguid_stats_proc_read(char *page, char **num_items_returned, | |
+ off_t items_to_skip, int char_count, int *eof, | |
+ void *data) | |
+{ | |
+ struct proc_print_info ppi; | |
+ int len; | |
+ | |
+ ppi.outp = page; | |
+ ppi.item_index = 0; | |
+ ppi.char_count = char_count; | |
+ ppi.num_items_returned = num_items_returned; | |
+ ppi.items_to_skip = items_to_skip; | |
+ | |
+ if (unlikely(module_passive)) { | |
+ len = pp_stats_line(&ppi, 0); | |
+ /* The header should always be shorter than the buffer. */ | |
+ BUG_ON(len >= ppi.char_count); | |
+ (*num_items_returned)++; | |
+ *eof = 1; | |
+ return len; | |
+ } | |
+ | |
+ CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " | |
+ "char_count=%d *eof=%d\n", page, *num_items_returned, | |
+ items_to_skip, char_count, *eof); | |
+ | |
+ if (*eof) | |
+ return 0; | |
+ | |
+ /* The idx is there to help debug when things go belly up. */ | |
+ len = pp_stats_line(&ppi, 0); | |
+ /* Don't advance the outp unless the whole line was printed */ | |
+ if (len >= ppi.char_count) { | |
+ *ppi.outp = '\0'; | |
+ return ppi.outp - page; | |
+ } | |
+ if (len) { | |
+ ppi.outp += len; | |
+ ppi.char_count -= len; | |
+ (*num_items_returned)++; | |
+ } | |
+ | |
+ spin_lock_bh(&iface_stat_list_lock); | |
+ list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { | |
+ struct rb_node *node; | |
+ spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); | |
+ for (node = rb_first(&ppi.iface_entry->tag_stat_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); | |
+ if (!pp_sets(&ppi)) { | |
+ spin_unlock_bh( | |
+ &ppi.iface_entry->tag_stat_list_lock); | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ return ppi.outp - page; | |
+ } | |
+ } | |
+ spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); | |
+ } | |
+ spin_unlock_bh(&iface_stat_list_lock); | |
+ | |
+ *eof = 1; | |
+ return ppi.outp - page; | |
+} | |
+ | |
+/*------------------------------------------*/ | |
+static int qtudev_open(struct inode *inode, struct file *file) | |
+{ | |
+ struct uid_tag_data *utd_entry; | |
+ struct proc_qtu_data *pqd_entry; | |
+ struct proc_qtu_data *new_pqd_entry; | |
+ int res; | |
+ bool utd_entry_found; | |
+ | |
+ if (unlikely(qtu_proc_handling_passive)) | |
+ return 0; | |
+ | |
+ DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", | |
+ current->pid, current->tgid, current_fsuid()); | |
+ | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ | |
+ /* Look for existing uid data, or alloc one. */ | |
+ utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); | |
+ if (IS_ERR_OR_NULL(utd_entry)) { | |
+ res = PTR_ERR(utd_entry); | |
+ goto err; | |
+ } | |
+ | |
+ /* Look for existing PID based proc_data */ | |
+ pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, | |
+ current->tgid); | |
+ if (pqd_entry) { | |
+ pr_err("qtaguid: qtudev_open(): %u/%u %u " | |
+ "%s already opened\n", | |
+ current->pid, current->tgid, current_fsuid(), | |
+ QTU_DEV_NAME); | |
+ res = -EBUSY; | |
+ goto err_unlock_free_utd; | |
+ } | |
+ | |
+ new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); | |
+ if (!new_pqd_entry) { | |
+ pr_err("qtaguid: qtudev_open(): %u/%u %u: " | |
+ "proc data alloc failed\n", | |
+ current->pid, current->tgid, current_fsuid()); | |
+ res = -ENOMEM; | |
+ goto err_unlock_free_utd; | |
+ } | |
+ new_pqd_entry->pid = current->tgid; | |
+ INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); | |
+ new_pqd_entry->parent_tag_data = utd_entry; | |
+ utd_entry->num_pqd++; | |
+ | |
+ proc_qtu_data_tree_insert(new_pqd_entry, | |
+ &proc_qtu_data_tree); | |
+ | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", | |
+ current_fsuid(), new_pqd_entry); | |
+ file->private_data = new_pqd_entry; | |
+ return 0; | |
+ | |
+err_unlock_free_utd: | |
+ if (!utd_entry_found) { | |
+ rb_erase(&utd_entry->node, &uid_tag_data_tree); | |
+ kfree(utd_entry); | |
+ } | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+err: | |
+ return res; | |
+} | |
+ | |
+static int qtudev_release(struct inode *inode, struct file *file) | |
+{ | |
+ struct proc_qtu_data *pqd_entry = file->private_data; | |
+ struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; | |
+ struct sock_tag *st_entry; | |
+ struct rb_root st_to_free_tree = RB_ROOT; | |
+ struct list_head *entry, *next; | |
+ struct tag_ref *tr; | |
+ | |
+ if (unlikely(qtu_proc_handling_passive)) | |
+ return 0; | |
+ | |
+ /* | |
+ * Do not trust the current->pid, it might just be a kworker cleaning | |
+ * up after a dead proc. | |
+ */ | |
+ DR_DEBUG("qtaguid: qtudev_release(): " | |
+ "pid=%u tgid=%u uid=%u " | |
+ "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", | |
+ current->pid, current->tgid, pqd_entry->parent_tag_data->uid, | |
+ pqd_entry, pqd_entry->pid, utd_entry, | |
+ utd_entry->num_active_tags); | |
+ | |
+ spin_lock_bh(&sock_tag_list_lock); | |
+ spin_lock_bh(&uid_tag_data_tree_lock); | |
+ | |
+ list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { | |
+ st_entry = list_entry(entry, struct sock_tag, list); | |
+ DR_DEBUG("qtaguid: %s(): " | |
+ "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", | |
+ __func__, | |
+ st_entry, st_entry->sk, | |
+ current->pid, current->tgid, | |
+ pqd_entry->parent_tag_data->uid); | |
+ | |
+ utd_entry = uid_tag_data_tree_search( | |
+ &uid_tag_data_tree, | |
+ get_uid_from_tag(st_entry->tag)); | |
+ BUG_ON(IS_ERR_OR_NULL(utd_entry)); | |
+ DR_DEBUG("qtaguid: %s(): " | |
+ "looking for tag=0x%llx in utd_entry=%p\n", __func__, | |
+ st_entry->tag, utd_entry); | |
+ tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, | |
+ st_entry->tag); | |
+ BUG_ON(!tr); | |
+ BUG_ON(tr->num_sock_tags <= 0); | |
+ tr->num_sock_tags--; | |
+ free_tag_ref_from_utd_entry(tr, utd_entry); | |
+ | |
+ rb_erase(&st_entry->sock_node, &sock_tag_tree); | |
+ list_del(&st_entry->list); | |
+ /* Can't sockfd_put() within spinlock, do it later. */ | |
+ sock_tag_tree_insert(st_entry, &st_to_free_tree); | |
+ | |
+ /* | |
+ * Try to free the utd_entry if no other proc_qtu_data is | |
+ * using it (num_pqd is 0) and it doesn't have active tags | |
+ * (num_active_tags is 0). | |
+ */ | |
+ put_utd_entry(utd_entry); | |
+ } | |
+ | |
+ rb_erase(&pqd_entry->node, &proc_qtu_data_tree); | |
+ BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); | |
+ pqd_entry->parent_tag_data->num_pqd--; | |
+ put_utd_entry(pqd_entry->parent_tag_data); | |
+ kfree(pqd_entry); | |
+ file->private_data = NULL; | |
+ | |
+ spin_unlock_bh(&uid_tag_data_tree_lock); | |
+ spin_unlock_bh(&sock_tag_list_lock); | |
+ | |
+ | |
+ sock_tag_tree_erase(&st_to_free_tree); | |
+ | |
+ prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, | |
+ current->pid, current->tgid); | |
+ return 0; | |
+} | |
+ | |
+/*------------------------------------------*/ | |
+static const struct file_operations qtudev_fops = { | |
+ .owner = THIS_MODULE, | |
+ .open = qtudev_open, | |
+ .release = qtudev_release, | |
+}; | |
+ | |
+static struct miscdevice qtu_device = { | |
+ .minor = MISC_DYNAMIC_MINOR, | |
+ .name = QTU_DEV_NAME, | |
+ .fops = &qtudev_fops, | |
+ /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ | |
+}; | |
+ | |
+/*------------------------------------------*/ | |
+static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) | |
+{ | |
+ int ret; | |
+ *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); | |
+ if (!*res_procdir) { | |
+ pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); | |
+ ret = -ENOMEM; | |
+ goto no_dir; | |
+ } | |
+ | |
+ xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, | |
+ *res_procdir); | |
+ if (!xt_qtaguid_ctrl_file) { | |
+ pr_err("qtaguid: failed to create xt_qtaguid/ctrl " | |
+ " file\n"); | |
+ ret = -ENOMEM; | |
+ goto no_ctrl_entry; | |
+ } | |
+ xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; | |
+ xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; | |
+ | |
+ xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, | |
+ *res_procdir); | |
+ if (!xt_qtaguid_stats_file) { | |
+ pr_err("qtaguid: failed to create xt_qtaguid/stats " | |
+ "file\n"); | |
+ ret = -ENOMEM; | |
+ goto no_stats_entry; | |
+ } | |
+ xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; | |
+ /* | |
+ * TODO: add support counter hacking | |
+ * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; | |
+ */ | |
+ return 0; | |
+ | |
+no_stats_entry: | |
+ remove_proc_entry("ctrl", *res_procdir); | |
+no_ctrl_entry: | |
+ remove_proc_entry("xt_qtaguid", NULL); | |
+no_dir: | |
+ return ret; | |
+} | |
+ | |
+static struct xt_match qtaguid_mt_reg __read_mostly = { | |
+ /* | |
+ * This module masquerades as the "owner" module so that iptables | |
+ * tools can deal with it. | |
+ */ | |
+ .name = "owner", | |
+ .revision = 1, | |
+ .family = NFPROTO_UNSPEC, | |
+ .match = qtaguid_mt, | |
+ .matchsize = sizeof(struct xt_qtaguid_match_info), | |
+ .me = THIS_MODULE, | |
+}; | |
+ | |
+static int __init qtaguid_mt_init(void) | |
+{ | |
+ if (qtaguid_proc_register(&xt_qtaguid_procdir) | |
+ || iface_stat_init(xt_qtaguid_procdir) | |
+ || xt_register_match(&qtaguid_mt_reg) | |
+ || misc_register(&qtu_device)) | |
+ return -1; | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * TODO: allow unloading of the module. | |
+ * For now stats are permanent. | |
+ * Kconfig forces'y/n' and never an 'm'. | |
+ */ | |
+ | |
+module_init(qtaguid_mt_init); | |
+MODULE_AUTHOR("jpa <[email protected]>"); | |
+MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); | |
+MODULE_LICENSE("GPL"); | |
+MODULE_ALIAS("ipt_owner"); | |
+MODULE_ALIAS("ip6t_owner"); | |
+MODULE_ALIAS("ipt_qtaguid"); | |
+MODULE_ALIAS("ip6t_qtaguid"); | |
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h | |
new file mode 100644 | |
index 0000000..02479d6 | |
--- /dev/null | |
+++ b/net/netfilter/xt_qtaguid_internal.h | |
@@ -0,0 +1,330 @@ | |
+/* | |
+ * Kernel iptables module to track stats for packets based on user tags. | |
+ * | |
+ * (C) 2011 Google, Inc | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#ifndef __XT_QTAGUID_INTERNAL_H__ | |
+#define __XT_QTAGUID_INTERNAL_H__ | |
+ | |
+#include <linux/types.h> | |
+#include <linux/rbtree.h> | |
+#include <linux/spinlock_types.h> | |
+#include <linux/workqueue.h> | |
+ | |
+/* Iface handling */ | |
+#define IDEBUG_MASK (1<<0) | |
+/* Iptable Matching. Per packet. */ | |
+#define MDEBUG_MASK (1<<1) | |
+/* Red-black tree handling. Per packet. */ | |
+#define RDEBUG_MASK (1<<2) | |
+/* procfs ctrl/stats handling */ | |
+#define CDEBUG_MASK (1<<3) | |
+/* dev and resource tracking */ | |
+#define DDEBUG_MASK (1<<4) | |
+ | |
+/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ | |
+#define DEFAULT_DEBUG_MASK 0 | |
+ | |
+/* | |
+ * (Un)Define these *DEBUG to compile out/in the pr_debug calls. | |
+ * All undef: text size ~ 0x3030; all def: ~ 0x4404. | |
+ */ | |
+#define IDEBUG | |
+#define MDEBUG | |
+#define RDEBUG | |
+#define CDEBUG | |
+#define DDEBUG | |
+ | |
+#define MSK_DEBUG(mask, ...) do { \ | |
+ if (unlikely(qtaguid_debug_mask & (mask))) \ | |
+ pr_debug(__VA_ARGS__); \ | |
+ } while (0) | |
+#ifdef IDEBUG | |
+#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) | |
+#else | |
+#define IF_DEBUG(...) no_printk(__VA_ARGS__) | |
+#endif | |
+#ifdef MDEBUG | |
+#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) | |
+#else | |
+#define MT_DEBUG(...) no_printk(__VA_ARGS__) | |
+#endif | |
+#ifdef RDEBUG | |
+#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) | |
+#else | |
+#define RB_DEBUG(...) no_printk(__VA_ARGS__) | |
+#endif | |
+#ifdef CDEBUG | |
+#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) | |
+#else | |
+#define CT_DEBUG(...) no_printk(__VA_ARGS__) | |
+#endif | |
+#ifdef DDEBUG | |
+#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) | |
+#else | |
+#define DR_DEBUG(...) no_printk(__VA_ARGS__) | |
+#endif | |
+ | |
+extern uint qtaguid_debug_mask; | |
+ | |
+/*---------------------------------------------------------------------------*/ | |
+/* | |
+ * Tags: | |
+ * | |
+ * They represent what the data usage counters will be tracked against. | |
+ * By default a tag is just based on the UID. | |
+ * The UID is used as the base for policing, and can not be ignored. | |
+ * So a tag will always at least represent a UID (uid_tag). | |
+ * | |
+ * A tag can be augmented with an "accounting tag" which is associated | |
+ * with a UID. | |
+ * User space can set the acct_tag portion of the tag which is then used | |
+ * with sockets: all data belonging to that socket will be counted against the | |
+ * tag. The policing is then based on the tag's uid_tag portion, | |
+ * and stats are collected for the acct_tag portion separately. | |
+ * | |
+ * There could be | |
+ * a: {acct_tag=1, uid_tag=10003} | |
+ * b: {acct_tag=2, uid_tag=10003} | |
+ * c: {acct_tag=3, uid_tag=10003} | |
+ * d: {acct_tag=0, uid_tag=10003} | |
+ * a, b, and c represent tags associated with specific sockets. | |
+ * d is for the totals for that uid, including all untagged traffic. | |
+ * Typically d is used with policing/quota rules. | |
+ * | |
+ * We want tag_t big enough to distinguish uid_t and acct_tag. | |
+ * It might become a struct if needed. | |
+ * Nothing should be using it as an int. | |
+ */ | |
+typedef uint64_t tag_t; /* Only used via accessors */ | |
+ | |
+#define TAG_UID_MASK 0xFFFFFFFFULL | |
+#define TAG_ACCT_MASK (~0xFFFFFFFFULL) | |
+ | |
+static inline int tag_compare(tag_t t1, tag_t t2) | |
+{ | |
+ return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; | |
+} | |
+ | |
+static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) | |
+{ | |
+ return acct_tag | uid; | |
+} | |
+static inline tag_t make_tag_from_uid(uid_t uid) | |
+{ | |
+ return uid; | |
+} | |
+static inline uid_t get_uid_from_tag(tag_t tag) | |
+{ | |
+ return tag & TAG_UID_MASK; | |
+} | |
+static inline tag_t get_utag_from_tag(tag_t tag) | |
+{ | |
+ return tag & TAG_UID_MASK; | |
+} | |
+static inline tag_t get_atag_from_tag(tag_t tag) | |
+{ | |
+ return tag & TAG_ACCT_MASK; | |
+} | |
+ | |
+static inline bool valid_atag(tag_t tag) | |
+{ | |
+ return !(tag & TAG_UID_MASK); | |
+} | |
+static inline tag_t make_atag_from_value(uint32_t value) | |
+{ | |
+ return (uint64_t)value << 32; | |
+} | |
+/*---------------------------------------------------------------------------*/ | |
+ | |
+/* | |
+ * Maximum number of socket tags that a UID is allowed to have active. | |
+ * Multiple processes belonging to the same UID contribute towards this limit. | |
+ * Special UIDs that can impersonate a UID also contribute (e.g. download | |
+ * manager, ...) | |
+ */ | |
+#define DEFAULT_MAX_SOCK_TAGS 1024 | |
+ | |
+/* | |
+ * For now we only track 2 sets of counters. | |
+ * The default set is 0. | |
+ * Userspace can activate another set for a given uid being tracked. | |
+ */ | |
+#define IFS_MAX_COUNTER_SETS 2 | |
+ | |
+enum ifs_tx_rx { | |
+ IFS_TX, | |
+ IFS_RX, | |
+ IFS_MAX_DIRECTIONS | |
+}; | |
+ | |
+/* For now, TCP, UDP, the rest */ | |
+enum ifs_proto { | |
+ IFS_TCP, | |
+ IFS_UDP, | |
+ IFS_PROTO_OTHER, | |
+ IFS_MAX_PROTOS | |
+}; | |
+ | |
+struct byte_packet_counters { | |
+ uint64_t bytes; | |
+ uint64_t packets; | |
+}; | |
+ | |
+struct data_counters { | |
+ struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; | |
+}; | |
+ | |
+/* Generic X based nodes used as a base for rb_tree ops */ | |
+struct tag_node { | |
+ struct rb_node node; | |
+ tag_t tag; | |
+}; | |
+ | |
+struct tag_stat { | |
+ struct tag_node tn; | |
+ struct data_counters counters; | |
+ /* | |
+ * If this tag is acct_tag based, we need to count against the | |
+ * matching parent uid_tag. | |
+ */ | |
+ struct data_counters *parent_counters; | |
+}; | |
+ | |
+struct iface_stat { | |
+ struct list_head list; /* in iface_stat_list */ | |
+ char *ifname; | |
+ bool active; | |
+ /* net_dev is only valid for active iface_stat */ | |
+ struct net_device *net_dev; | |
+ | |
+ struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; | |
+ /* | |
+ * We keep the last_known, because some devices reset their counters | |
+ * just before NETDEV_UP, while some will reset just before | |
+ * NETDEV_REGISTER (which is more normal). | |
+ * So now, if the device didn't do a NETDEV_UNREGISTER and we see | |
+ * its current dev stats smaller that what was previously known, we | |
+ * assume an UNREGISTER and just use the last_known. | |
+ */ | |
+ struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; | |
+ /* last_known is usable when last_known_valid is true */ | |
+ bool last_known_valid; | |
+ | |
+ struct proc_dir_entry *proc_ptr; | |
+ | |
+ struct rb_root tag_stat_tree; | |
+ spinlock_t tag_stat_list_lock; | |
+}; | |
+ | |
+/* This is needed to create proc_dir_entries from atomic context. */ | |
+struct iface_stat_work { | |
+ struct work_struct iface_work; | |
+ struct iface_stat *iface_entry; | |
+}; | |
+ | |
+/* | |
+ * Track tag that this socket is transferring data for, and not necessarily | |
+ * the uid that owns the socket. | |
+ * This is the tag against which tag_stat.counters will be billed. | |
+ * These structs need to be looked up by sock and pid. | |
+ */ | |
+struct sock_tag { | |
+ struct rb_node sock_node; | |
+ struct sock *sk; /* Only used as a number, never dereferenced */ | |
+ /* The socket is needed for sockfd_put() */ | |
+ struct socket *socket; | |
+ /* Used to associate with a given pid */ | |
+ struct list_head list; /* in proc_qtu_data.sock_tag_list */ | |
+ pid_t pid; | |
+ | |
+ tag_t tag; | |
+}; | |
+ | |
+struct qtaguid_event_counts { | |
+ /* Various successful events */ | |
+ atomic64_t sockets_tagged; | |
+ atomic64_t sockets_untagged; | |
+ atomic64_t counter_set_changes; | |
+ atomic64_t delete_cmds; | |
+ atomic64_t iface_events; /* Number of NETDEV_* events handled */ | |
+ | |
+ atomic64_t match_calls; /* Number of times iptables called mt */ | |
+ /* | |
+ * match_found_sk_*: numbers related to the netfilter matching | |
+ * function finding a sock for the sk_buff. | |
+ * Total skbs processed is sum(match_found*). | |
+ */ | |
+ atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ | |
+ /* The connection tracker had or didn't have the sk. */ | |
+ atomic64_t match_found_sk_in_ct; | |
+ atomic64_t match_found_no_sk_in_ct; | |
+ /* | |
+ * No sk could be found. No apparent owner. Could happen with | |
+ * unsolicited traffic. | |
+ */ | |
+ atomic64_t match_no_sk; | |
+ /* | |
+ * The file ptr in the sk_socket wasn't there. | |
+ * This might happen for traffic while the socket is being closed. | |
+ */ | |
+ atomic64_t match_no_sk_file; | |
+}; | |
+ | |
+/* Track the set active_set for the given tag. */ | |
+struct tag_counter_set { | |
+ struct tag_node tn; | |
+ int active_set; | |
+}; | |
+ | |
+/*----------------------------------------------*/ | |
+/* | |
+ * The qtu uid data is used to track resources that are created directly or | |
+ * indirectly by processes (uid tracked). | |
+ * It is shared by the processes with the same uid. | |
+ * Some of the resource will be counted to prevent further rogue allocations, | |
+ * some will need freeing once the owner process (uid) exits. | |
+ */ | |
+struct uid_tag_data { | |
+ struct rb_node node; | |
+ uid_t uid; | |
+ | |
+ /* | |
+ * For the uid, how many accounting tags have been set. | |
+ */ | |
+ int num_active_tags; | |
+ /* Track the number of proc_qtu_data that reference it */ | |
+ int num_pqd; | |
+ struct rb_root tag_ref_tree; | |
+ /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ | |
+}; | |
+ | |
+struct tag_ref { | |
+ struct tag_node tn; | |
+ | |
+ /* | |
+ * This tracks the number of active sockets that have a tag on them | |
+ * which matches this tag_ref.tn.tag. | |
+ * A tag ref can live on after the sockets are untagged. | |
+ * A tag ref can only be removed during a tag delete command. | |
+ */ | |
+ int num_sock_tags; | |
+}; | |
+ | |
+struct proc_qtu_data { | |
+ struct rb_node node; | |
+ pid_t pid; | |
+ | |
+ struct uid_tag_data *parent_tag_data; | |
+ | |
+ /* Tracks the sock_tags that need freeing upon this proc's death */ | |
+ struct list_head sock_tag_list; | |
+ /* No spinlock_t sock_tag_list_lock; use the global one. */ | |
+}; | |
+ | |
+/*----------------------------------------------*/ | |
+#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ | |
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c | |
new file mode 100644 | |
index 0000000..3917678 | |
--- /dev/null | |
+++ b/net/netfilter/xt_qtaguid_print.c | |
@@ -0,0 +1,556 @@ | |
+/* | |
+ * Pretty printing Support for iptables xt_qtaguid module. | |
+ * | |
+ * (C) 2011 Google, Inc | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+ | |
+/* | |
+ * Most of the functions in this file just waste time if DEBUG is not defined. | |
+ * The matching xt_qtaguid_print.h will static inline empty funcs if the needed | |
+ * debug flags ore not defined. | |
+ * Those funcs that fail to allocate memory will panic as there is no need to | |
+ * hobble allong just pretending to do the requested work. | |
+ */ | |
+ | |
+#define DEBUG | |
+ | |
+#include <linux/fs.h> | |
+#include <linux/gfp.h> | |
+#include <linux/net.h> | |
+#include <linux/rbtree.h> | |
+#include <linux/slab.h> | |
+#include <linux/spinlock_types.h> | |
+ | |
+ | |
+#include "xt_qtaguid_internal.h" | |
+#include "xt_qtaguid_print.h" | |
+ | |
+#ifdef DDEBUG | |
+ | |
+static void _bug_on_err_or_null(void *ptr) | |
+{ | |
+ if (IS_ERR_OR_NULL(ptr)) { | |
+ pr_err("qtaguid: kmalloc failed\n"); | |
+ BUG(); | |
+ } | |
+} | |
+ | |
+char *pp_tag_t(tag_t *tag) | |
+{ | |
+ char *res; | |
+ | |
+ if (!tag) | |
+ res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); | |
+ else | |
+ res = kasprintf(GFP_ATOMIC, | |
+ "tag_t@%p{tag=0x%llx, uid=%u}", | |
+ tag, *tag, get_uid_from_tag(*tag)); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+} | |
+ | |
+char *pp_data_counters(struct data_counters *dc, bool showValues) | |
+{ | |
+ char *res; | |
+ | |
+ if (!dc) | |
+ res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); | |
+ else if (showValues) | |
+ res = kasprintf( | |
+ GFP_ATOMIC, "data_counters@%p{" | |
+ "set0{" | |
+ "rx{" | |
+ "tcp{b=%llu, p=%llu}, " | |
+ "udp{b=%llu, p=%llu}," | |
+ "other{b=%llu, p=%llu}}, " | |
+ "tx{" | |
+ "tcp{b=%llu, p=%llu}, " | |
+ "udp{b=%llu, p=%llu}," | |
+ "other{b=%llu, p=%llu}}}, " | |
+ "set1{" | |
+ "rx{" | |
+ "tcp{b=%llu, p=%llu}, " | |
+ "udp{b=%llu, p=%llu}," | |
+ "other{b=%llu, p=%llu}}, " | |
+ "tx{" | |
+ "tcp{b=%llu, p=%llu}, " | |
+ "udp{b=%llu, p=%llu}," | |
+ "other{b=%llu, p=%llu}}}}", | |
+ dc, | |
+ dc->bpc[0][IFS_RX][IFS_TCP].bytes, | |
+ dc->bpc[0][IFS_RX][IFS_TCP].packets, | |
+ dc->bpc[0][IFS_RX][IFS_UDP].bytes, | |
+ dc->bpc[0][IFS_RX][IFS_UDP].packets, | |
+ dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, | |
+ dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, | |
+ dc->bpc[0][IFS_TX][IFS_TCP].bytes, | |
+ dc->bpc[0][IFS_TX][IFS_TCP].packets, | |
+ dc->bpc[0][IFS_TX][IFS_UDP].bytes, | |
+ dc->bpc[0][IFS_TX][IFS_UDP].packets, | |
+ dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, | |
+ dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, | |
+ dc->bpc[1][IFS_RX][IFS_TCP].bytes, | |
+ dc->bpc[1][IFS_RX][IFS_TCP].packets, | |
+ dc->bpc[1][IFS_RX][IFS_UDP].bytes, | |
+ dc->bpc[1][IFS_RX][IFS_UDP].packets, | |
+ dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, | |
+ dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, | |
+ dc->bpc[1][IFS_TX][IFS_TCP].bytes, | |
+ dc->bpc[1][IFS_TX][IFS_TCP].packets, | |
+ dc->bpc[1][IFS_TX][IFS_UDP].bytes, | |
+ dc->bpc[1][IFS_TX][IFS_UDP].packets, | |
+ dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, | |
+ dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); | |
+ else | |
+ res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+} | |
+ | |
+char *pp_tag_node(struct tag_node *tn) | |
+{ | |
+ char *tag_str; | |
+ char *res; | |
+ | |
+ if (!tn) { | |
+ res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+ } | |
+ tag_str = pp_tag_t(&tn->tag); | |
+ res = kasprintf(GFP_ATOMIC, | |
+ "tag_node@%p{tag=%s}", | |
+ tn, tag_str); | |
+ _bug_on_err_or_null(res); | |
+ kfree(tag_str); | |
+ return res; | |
+} | |
+ | |
+char *pp_tag_ref(struct tag_ref *tr) | |
+{ | |
+ char *tn_str; | |
+ char *res; | |
+ | |
+ if (!tr) { | |
+ res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+ } | |
+ tn_str = pp_tag_node(&tr->tn); | |
+ res = kasprintf(GFP_ATOMIC, | |
+ "tag_ref@%p{%s, num_sock_tags=%d}", | |
+ tr, tn_str, tr->num_sock_tags); | |
+ _bug_on_err_or_null(res); | |
+ kfree(tn_str); | |
+ return res; | |
+} | |
+ | |
+char *pp_tag_stat(struct tag_stat *ts) | |
+{ | |
+ char *tn_str; | |
+ char *counters_str; | |
+ char *parent_counters_str; | |
+ char *res; | |
+ | |
+ if (!ts) { | |
+ res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+ } | |
+ tn_str = pp_tag_node(&ts->tn); | |
+ counters_str = pp_data_counters(&ts->counters, true); | |
+ parent_counters_str = pp_data_counters(ts->parent_counters, false); | |
+ res = kasprintf(GFP_ATOMIC, | |
+ "tag_stat@%p{%s, counters=%s, parent_counters=%s}", | |
+ ts, tn_str, counters_str, parent_counters_str); | |
+ _bug_on_err_or_null(res); | |
+ kfree(tn_str); | |
+ kfree(counters_str); | |
+ kfree(parent_counters_str); | |
+ return res; | |
+} | |
+ | |
+char *pp_iface_stat(struct iface_stat *is) | |
+{ | |
+ char *res; | |
+ if (!is) | |
+ res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); | |
+ else | |
+ res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" | |
+ "list=list_head{...}, " | |
+ "ifname=%s, " | |
+ "total={rx={bytes=%llu, " | |
+ "packets=%llu}, " | |
+ "tx={bytes=%llu, " | |
+ "packets=%llu}}, " | |
+ "last_known_valid=%d, " | |
+ "last_known={rx={bytes=%llu, " | |
+ "packets=%llu}, " | |
+ "tx={bytes=%llu, " | |
+ "packets=%llu}}, " | |
+ "active=%d, " | |
+ "net_dev=%p, " | |
+ "proc_ptr=%p, " | |
+ "tag_stat_tree=rb_root{...}}", | |
+ is, | |
+ is->ifname, | |
+ is->totals[IFS_RX].bytes, | |
+ is->totals[IFS_RX].packets, | |
+ is->totals[IFS_TX].bytes, | |
+ is->totals[IFS_TX].packets, | |
+ is->last_known_valid, | |
+ is->last_known[IFS_RX].bytes, | |
+ is->last_known[IFS_RX].packets, | |
+ is->last_known[IFS_TX].bytes, | |
+ is->last_known[IFS_TX].packets, | |
+ is->active, | |
+ is->net_dev, | |
+ is->proc_ptr); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+} | |
+ | |
+char *pp_sock_tag(struct sock_tag *st) | |
+{ | |
+ char *tag_str; | |
+ char *res; | |
+ | |
+ if (!st) { | |
+ res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+ } | |
+ tag_str = pp_tag_t(&st->tag); | |
+ res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" | |
+ "sock_node=rb_node{...}, " | |
+ "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " | |
+ "pid=%u, tag=%s}", | |
+ st, st->sk, st->socket, atomic_long_read( | |
+ &st->socket->file->f_count), | |
+ st->pid, tag_str); | |
+ _bug_on_err_or_null(res); | |
+ kfree(tag_str); | |
+ return res; | |
+} | |
+ | |
+char *pp_uid_tag_data(struct uid_tag_data *utd) | |
+{ | |
+ char *res; | |
+ | |
+ if (!utd) | |
+ res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); | |
+ else | |
+ res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" | |
+ "uid=%u, num_active_acct_tags=%d, " | |
+ "num_pqd=%d, " | |
+ "tag_node_tree=rb_root{...}, " | |
+ "proc_qtu_data_tree=rb_root{...}}", | |
+ utd, utd->uid, | |
+ utd->num_active_tags, utd->num_pqd); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+} | |
+ | |
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd) | |
+{ | |
+ char *parent_tag_data_str; | |
+ char *res; | |
+ | |
+ if (!pqd) { | |
+ res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); | |
+ _bug_on_err_or_null(res); | |
+ return res; | |
+ } | |
+ parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); | |
+ res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" | |
+ "node=rb_node{...}, pid=%u, " | |
+ "parent_tag_data=%s, " | |
+ "sock_tag_list=list_head{...}}", | |
+ pqd, pqd->pid, parent_tag_data_str | |
+ ); | |
+ _bug_on_err_or_null(res); | |
+ kfree(parent_tag_data_str); | |
+ return res; | |
+} | |
+ | |
+/*------------------------------------------*/ | |
+void prdebug_sock_tag_tree(int indent_level, | |
+ struct rb_root *sock_tag_tree) | |
+{ | |
+ struct rb_node *node; | |
+ struct sock_tag *sock_tag_entry; | |
+ char *str; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (RB_EMPTY_ROOT(sock_tag_tree)) { | |
+ str = "sock_tag_tree=rb_root{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "sock_tag_tree=rb_root{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ for (node = rb_first(sock_tag_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | |
+ str = pp_sock_tag(sock_tag_entry); | |
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | |
+ kfree(str); | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+void prdebug_sock_tag_list(int indent_level, | |
+ struct list_head *sock_tag_list) | |
+{ | |
+ struct sock_tag *sock_tag_entry; | |
+ char *str; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (list_empty(sock_tag_list)) { | |
+ str = "sock_tag_list=list_head{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "sock_tag_list=list_head{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ list_for_each_entry(sock_tag_entry, sock_tag_list, list) { | |
+ str = pp_sock_tag(sock_tag_entry); | |
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | |
+ kfree(str); | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+void prdebug_proc_qtu_data_tree(int indent_level, | |
+ struct rb_root *proc_qtu_data_tree) | |
+{ | |
+ char *str; | |
+ struct rb_node *node; | |
+ struct proc_qtu_data *proc_qtu_data_entry; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { | |
+ str = "proc_qtu_data_tree=rb_root{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "proc_qtu_data_tree=rb_root{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ for (node = rb_first(proc_qtu_data_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ proc_qtu_data_entry = rb_entry(node, | |
+ struct proc_qtu_data, | |
+ node); | |
+ str = pp_proc_qtu_data(proc_qtu_data_entry); | |
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, | |
+ str); | |
+ kfree(str); | |
+ indent_level++; | |
+ prdebug_sock_tag_list(indent_level, | |
+ &proc_qtu_data_entry->sock_tag_list); | |
+ indent_level--; | |
+ | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) | |
+{ | |
+ char *str; | |
+ struct rb_node *node; | |
+ struct tag_ref *tag_ref_entry; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (RB_EMPTY_ROOT(tag_ref_tree)) { | |
+ str = "tag_ref_tree{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "tag_ref_tree{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ for (node = rb_first(tag_ref_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ tag_ref_entry = rb_entry(node, | |
+ struct tag_ref, | |
+ tn.node); | |
+ str = pp_tag_ref(tag_ref_entry); | |
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, | |
+ str); | |
+ kfree(str); | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+void prdebug_uid_tag_data_tree(int indent_level, | |
+ struct rb_root *uid_tag_data_tree) | |
+{ | |
+ char *str; | |
+ struct rb_node *node; | |
+ struct uid_tag_data *uid_tag_data_entry; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (RB_EMPTY_ROOT(uid_tag_data_tree)) { | |
+ str = "uid_tag_data_tree=rb_root{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "uid_tag_data_tree=rb_root{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ for (node = rb_first(uid_tag_data_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ uid_tag_data_entry = rb_entry(node, struct uid_tag_data, | |
+ node); | |
+ str = pp_uid_tag_data(uid_tag_data_entry); | |
+ pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | |
+ kfree(str); | |
+ if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { | |
+ indent_level++; | |
+ prdebug_tag_ref_tree(indent_level, | |
+ &uid_tag_data_entry->tag_ref_tree); | |
+ indent_level--; | |
+ } | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+void prdebug_tag_stat_tree(int indent_level, | |
+ struct rb_root *tag_stat_tree) | |
+{ | |
+ char *str; | |
+ struct rb_node *node; | |
+ struct tag_stat *ts_entry; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (RB_EMPTY_ROOT(tag_stat_tree)) { | |
+ str = "tag_stat_tree{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "tag_stat_tree{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ for (node = rb_first(tag_stat_tree); | |
+ node; | |
+ node = rb_next(node)) { | |
+ ts_entry = rb_entry(node, struct tag_stat, tn.node); | |
+ str = pp_tag_stat(ts_entry); | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, | |
+ str); | |
+ kfree(str); | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+void prdebug_iface_stat_list(int indent_level, | |
+ struct list_head *iface_stat_list) | |
+{ | |
+ char *str; | |
+ struct iface_stat *iface_entry; | |
+ | |
+ if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
+ return; | |
+ | |
+ if (list_empty(iface_stat_list)) { | |
+ str = "iface_stat_list=list_head{}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ return; | |
+ } | |
+ | |
+ str = "iface_stat_list=list_head{"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ indent_level++; | |
+ list_for_each_entry(iface_entry, iface_stat_list, list) { | |
+ str = pp_iface_stat(iface_entry); | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+ kfree(str); | |
+ | |
+ spin_lock_bh(&iface_entry->tag_stat_list_lock); | |
+ if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { | |
+ indent_level++; | |
+ prdebug_tag_stat_tree(indent_level, | |
+ &iface_entry->tag_stat_tree); | |
+ indent_level--; | |
+ } | |
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
+ } | |
+ indent_level--; | |
+ str = "}"; | |
+ pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | |
+} | |
+ | |
+#endif /* ifdef DDEBUG */ | |
+/*------------------------------------------*/ | |
+static const char * const netdev_event_strings[] = { | |
+ "netdev_unknown", | |
+ "NETDEV_UP", | |
+ "NETDEV_DOWN", | |
+ "NETDEV_REBOOT", | |
+ "NETDEV_CHANGE", | |
+ "NETDEV_REGISTER", | |
+ "NETDEV_UNREGISTER", | |
+ "NETDEV_CHANGEMTU", | |
+ "NETDEV_CHANGEADDR", | |
+ "NETDEV_GOING_DOWN", | |
+ "NETDEV_CHANGENAME", | |
+ "NETDEV_FEAT_CHANGE", | |
+ "NETDEV_BONDING_FAILOVER", | |
+ "NETDEV_PRE_UP", | |
+ "NETDEV_PRE_TYPE_CHANGE", | |
+ "NETDEV_POST_TYPE_CHANGE", | |
+ "NETDEV_POST_INIT", | |
+ "NETDEV_UNREGISTER_BATCH", | |
+ "NETDEV_RELEASE", | |
+ "NETDEV_NOTIFY_PEERS", | |
+ "NETDEV_JOIN", | |
+}; | |
+ | |
+const char *netdev_evt_str(int netdev_event) | |
+{ | |
+ if (netdev_event < 0 | |
+ || netdev_event >= ARRAY_SIZE(netdev_event_strings)) | |
+ return "bad event num"; | |
+ return netdev_event_strings[netdev_event]; | |
+} | |
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h | |
new file mode 100644 | |
index 0000000..b63871a | |
--- /dev/null | |
+++ b/net/netfilter/xt_qtaguid_print.h | |
@@ -0,0 +1,120 @@ | |
+/* | |
+ * Pretty printing Support for iptables xt_qtaguid module. | |
+ * | |
+ * (C) 2011 Google, Inc | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#ifndef __XT_QTAGUID_PRINT_H__ | |
+#define __XT_QTAGUID_PRINT_H__ | |
+ | |
+#include "xt_qtaguid_internal.h" | |
+ | |
+#ifdef DDEBUG | |
+ | |
+char *pp_tag_t(tag_t *tag); | |
+char *pp_data_counters(struct data_counters *dc, bool showValues); | |
+char *pp_tag_node(struct tag_node *tn); | |
+char *pp_tag_ref(struct tag_ref *tr); | |
+char *pp_tag_stat(struct tag_stat *ts); | |
+char *pp_iface_stat(struct iface_stat *is); | |
+char *pp_sock_tag(struct sock_tag *st); | |
+char *pp_uid_tag_data(struct uid_tag_data *qtd); | |
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd); | |
+ | |
+/*------------------------------------------*/ | |
+void prdebug_sock_tag_list(int indent_level, | |
+ struct list_head *sock_tag_list); | |
+void prdebug_sock_tag_tree(int indent_level, | |
+ struct rb_root *sock_tag_tree); | |
+void prdebug_proc_qtu_data_tree(int indent_level, | |
+ struct rb_root *proc_qtu_data_tree); | |
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); | |
+void prdebug_uid_tag_data_tree(int indent_level, | |
+ struct rb_root *uid_tag_data_tree); | |
+void prdebug_tag_stat_tree(int indent_level, | |
+ struct rb_root *tag_stat_tree); | |
+void prdebug_iface_stat_list(int indent_level, | |
+ struct list_head *iface_stat_list); | |
+ | |
+#else | |
+ | |
+/*------------------------------------------*/ | |
+static inline char *pp_tag_t(tag_t *tag) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_data_counters(struct data_counters *dc, bool showValues) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_tag_node(struct tag_node *tn) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_tag_ref(struct tag_ref *tr) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_tag_stat(struct tag_stat *ts) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_iface_stat(struct iface_stat *is) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_sock_tag(struct sock_tag *st) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) | |
+{ | |
+ return NULL; | |
+} | |
+static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) | |
+{ | |
+ return NULL; | |
+} | |
+ | |
+/*------------------------------------------*/ | |
+static inline | |
+void prdebug_sock_tag_list(int indent_level, | |
+ struct list_head *sock_tag_list) | |
+{ | |
+} | |
+static inline | |
+void prdebug_sock_tag_tree(int indent_level, | |
+ struct rb_root *sock_tag_tree) | |
+{ | |
+} | |
+static inline | |
+void prdebug_proc_qtu_data_tree(int indent_level, | |
+ struct rb_root *proc_qtu_data_tree) | |
+{ | |
+} | |
+static inline | |
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) | |
+{ | |
+} | |
+static inline | |
+void prdebug_uid_tag_data_tree(int indent_level, | |
+ struct rb_root *uid_tag_data_tree) | |
+{ | |
+} | |
+static inline | |
+void prdebug_tag_stat_tree(int indent_level, | |
+ struct rb_root *tag_stat_tree) | |
+{ | |
+} | |
+static inline | |
+void prdebug_iface_stat_list(int indent_level, | |
+ struct list_head *iface_stat_list) | |
+{ | |
+} | |
+#endif | |
+/*------------------------------------------*/ | |
+const char *netdev_evt_str(int netdev_event); | |
+#endif /* ifndef __XT_QTAGUID_PRINT_H__ */ | |
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c | |
new file mode 100644 | |
index 0000000..3c72bea | |
--- /dev/null | |
+++ b/net/netfilter/xt_quota2.c | |
@@ -0,0 +1,381 @@ | |
+/* | |
+ * xt_quota2 - enhanced xt_quota that can count upwards and in packets | |
+ * as a minimal accounting match. | |
+ * by Jan Engelhardt <[email protected]>, 2008 | |
+ * | |
+ * Originally based on xt_quota.c: | |
+ * netfilter module to enforce network quotas | |
+ * Sam Johnston <[email protected]> | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License; either | |
+ * version 2 of the License, as published by the Free Software Foundation. | |
+ */ | |
+#include <linux/list.h> | |
+#include <linux/proc_fs.h> | |
+#include <linux/skbuff.h> | |
+#include <linux/spinlock.h> | |
+#include <asm/atomic.h> | |
+ | |
+#include <linux/netfilter/x_tables.h> | |
+#include <linux/netfilter/xt_quota2.h> | |
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | |
+#include <linux/netfilter_ipv4/ipt_ULOG.h> | |
+#endif | |
+ | |
+/** | |
+ * @lock: lock to protect quota writers from each other | |
+ */ | |
+struct xt_quota_counter { | |
+ u_int64_t quota; | |
+ spinlock_t lock; | |
+ struct list_head list; | |
+ atomic_t ref; | |
+ char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; | |
+ struct proc_dir_entry *procfs_entry; | |
+}; | |
+ | |
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | |
+/* Harald's favorite number +1 :D From ipt_ULOG.C */ | |
+static int qlog_nl_event = 112; | |
+module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); | |
+MODULE_PARM_DESC(event_num, | |
+ "Event number for NETLINK_NFLOG message. 0 disables log." | |
+ "111 is what ipt_ULOG uses."); | |
+static struct sock *nflognl; | |
+#endif | |
+ | |
+static LIST_HEAD(counter_list); | |
+static DEFINE_SPINLOCK(counter_list_lock); | |
+ | |
+static struct proc_dir_entry *proc_xt_quota; | |
+static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; | |
+static unsigned int quota_list_uid = 0; | |
+static unsigned int quota_list_gid = 0; | |
+module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); | |
+module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); | |
+module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); | |
+ | |
+ | |
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | |
+static void quota2_log(unsigned int hooknum, | |
+ const struct sk_buff *skb, | |
+ const struct net_device *in, | |
+ const struct net_device *out, | |
+ const char *prefix) | |
+{ | |
+ ulog_packet_msg_t *pm; | |
+ struct sk_buff *log_skb; | |
+ size_t size; | |
+ struct nlmsghdr *nlh; | |
+ | |
+ if (!qlog_nl_event) | |
+ return; | |
+ | |
+ size = NLMSG_SPACE(sizeof(*pm)); | |
+ size = max(size, (size_t)NLMSG_GOODSIZE); | |
+ log_skb = alloc_skb(size, GFP_ATOMIC); | |
+ if (!log_skb) { | |
+ pr_err("xt_quota2: cannot alloc skb for logging\n"); | |
+ return; | |
+ } | |
+ | |
+ /* NLMSG_PUT() uses "goto nlmsg_failure" */ | |
+ nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, | |
+ sizeof(*pm)); | |
+ pm = NLMSG_DATA(nlh); | |
+ if (skb->tstamp.tv64 == 0) | |
+ __net_timestamp((struct sk_buff *)skb); | |
+ pm->data_len = 0; | |
+ pm->hook = hooknum; | |
+ if (prefix != NULL) | |
+ strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); | |
+ else | |
+ *(pm->prefix) = '\0'; | |
+ if (in) | |
+ strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); | |
+ else | |
+ pm->indev_name[0] = '\0'; | |
+ | |
+ if (out) | |
+ strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); | |
+ else | |
+ pm->outdev_name[0] = '\0'; | |
+ | |
+ NETLINK_CB(log_skb).dst_group = 1; | |
+ pr_debug("throwing 1 packets to netlink group 1\n"); | |
+ netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); | |
+ | |
+nlmsg_failure: /* Used within NLMSG_PUT() */ | |
+ pr_debug("xt_quota2: error during NLMSG_PUT\n"); | |
+} | |
+#else | |
+static void quota2_log(unsigned int hooknum, | |
+ const struct sk_buff *skb, | |
+ const struct net_device *in, | |
+ const struct net_device *out, | |
+ const char *prefix) | |
+{ | |
+} | |
+#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ | |
+ | |
+static int quota_proc_read(char *page, char **start, off_t offset, | |
+ int count, int *eof, void *data) | |
+{ | |
+ struct xt_quota_counter *e = data; | |
+ int ret; | |
+ | |
+ spin_lock_bh(&e->lock); | |
+ ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); | |
+ spin_unlock_bh(&e->lock); | |
+ return ret; | |
+} | |
+ | |
+static int quota_proc_write(struct file *file, const char __user *input, | |
+ unsigned long size, void *data) | |
+{ | |
+ struct xt_quota_counter *e = data; | |
+ char buf[sizeof("18446744073709551616")]; | |
+ | |
+ if (size > sizeof(buf)) | |
+ size = sizeof(buf); | |
+ if (copy_from_user(buf, input, size) != 0) | |
+ return -EFAULT; | |
+ buf[sizeof(buf)-1] = '\0'; | |
+ | |
+ spin_lock_bh(&e->lock); | |
+ e->quota = simple_strtoull(buf, NULL, 0); | |
+ spin_unlock_bh(&e->lock); | |
+ return size; | |
+} | |
+ | |
+static struct xt_quota_counter * | |
+q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) | |
+{ | |
+ struct xt_quota_counter *e; | |
+ unsigned int size; | |
+ | |
+ /* Do not need all the procfs things for anonymous counters. */ | |
+ size = anon ? offsetof(typeof(*e), list) : sizeof(*e); | |
+ e = kmalloc(size, GFP_KERNEL); | |
+ if (e == NULL) | |
+ return NULL; | |
+ | |
+ e->quota = q->quota; | |
+ spin_lock_init(&e->lock); | |
+ if (!anon) { | |
+ INIT_LIST_HEAD(&e->list); | |
+ atomic_set(&e->ref, 1); | |
+ strlcpy(e->name, q->name, sizeof(e->name)); | |
+ } | |
+ return e; | |
+} | |
+ | |
+/** | |
+ * q2_get_counter - get ref to counter or create new | |
+ * @name: name of counter | |
+ */ | |
+static struct xt_quota_counter * | |
+q2_get_counter(const struct xt_quota_mtinfo2 *q) | |
+{ | |
+ struct proc_dir_entry *p; | |
+ struct xt_quota_counter *e = NULL; | |
+ struct xt_quota_counter *new_e; | |
+ | |
+ if (*q->name == '\0') | |
+ return q2_new_counter(q, true); | |
+ | |
+ /* No need to hold a lock while getting a new counter */ | |
+ new_e = q2_new_counter(q, false); | |
+ if (new_e == NULL) | |
+ goto out; | |
+ | |
+ spin_lock_bh(&counter_list_lock); | |
+ list_for_each_entry(e, &counter_list, list) | |
+ if (strcmp(e->name, q->name) == 0) { | |
+ atomic_inc(&e->ref); | |
+ spin_unlock_bh(&counter_list_lock); | |
+ kfree(new_e); | |
+ pr_debug("xt_quota2: old counter name=%s", e->name); | |
+ return e; | |
+ } | |
+ e = new_e; | |
+ pr_debug("xt_quota2: new_counter name=%s", e->name); | |
+ list_add_tail(&e->list, &counter_list); | |
+ /* The entry having a refcount of 1 is not directly destructible. | |
+ * This func has not yet returned the new entry, thus iptables | |
+ * has not references for destroying this entry. | |
+ * For another rule to try to destroy it, it would 1st need for this | |
+ * func* to be re-invoked, acquire a new ref for the same named quota. | |
+ * Nobody will access the e->procfs_entry either. | |
+ * So release the lock. */ | |
+ spin_unlock_bh(&counter_list_lock); | |
+ | |
+ /* create_proc_entry() is not spin_lock happy */ | |
+ p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, | |
+ proc_xt_quota); | |
+ | |
+ if (IS_ERR_OR_NULL(p)) { | |
+ spin_lock_bh(&counter_list_lock); | |
+ list_del(&e->list); | |
+ spin_unlock_bh(&counter_list_lock); | |
+ goto out; | |
+ } | |
+ p->data = e; | |
+ p->read_proc = quota_proc_read; | |
+ p->write_proc = quota_proc_write; | |
+ p->uid = quota_list_uid; | |
+ p->gid = quota_list_gid; | |
+ return e; | |
+ | |
+ out: | |
+ kfree(e); | |
+ return NULL; | |
+} | |
+ | |
+static int quota_mt2_check(const struct xt_mtchk_param *par) | |
+{ | |
+ struct xt_quota_mtinfo2 *q = par->matchinfo; | |
+ | |
+ pr_debug("xt_quota2: check() flags=0x%04x", q->flags); | |
+ | |
+ if (q->flags & ~XT_QUOTA_MASK) | |
+ return -EINVAL; | |
+ | |
+ q->name[sizeof(q->name)-1] = '\0'; | |
+ if (*q->name == '.' || strchr(q->name, '/') != NULL) { | |
+ printk(KERN_ERR "xt_quota.3: illegal name\n"); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ q->master = q2_get_counter(q); | |
+ if (q->master == NULL) { | |
+ printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); | |
+ return -ENOMEM; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static void quota_mt2_destroy(const struct xt_mtdtor_param *par) | |
+{ | |
+ struct xt_quota_mtinfo2 *q = par->matchinfo; | |
+ struct xt_quota_counter *e = q->master; | |
+ | |
+ if (*q->name == '\0') { | |
+ kfree(e); | |
+ return; | |
+ } | |
+ | |
+ spin_lock_bh(&counter_list_lock); | |
+ if (!atomic_dec_and_test(&e->ref)) { | |
+ spin_unlock_bh(&counter_list_lock); | |
+ return; | |
+ } | |
+ | |
+ list_del(&e->list); | |
+ remove_proc_entry(e->name, proc_xt_quota); | |
+ spin_unlock_bh(&counter_list_lock); | |
+ kfree(e); | |
+} | |
+ | |
+static bool | |
+quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) | |
+{ | |
+ struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; | |
+ struct xt_quota_counter *e = q->master; | |
+ bool ret = q->flags & XT_QUOTA_INVERT; | |
+ | |
+ spin_lock_bh(&e->lock); | |
+ if (q->flags & XT_QUOTA_GROW) { | |
+ /* | |
+ * While no_change is pointless in "grow" mode, we will | |
+ * implement it here simply to have a consistent behavior. | |
+ */ | |
+ if (!(q->flags & XT_QUOTA_NO_CHANGE)) { | |
+ e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; | |
+ } | |
+ ret = true; | |
+ } else { | |
+ if (e->quota >= skb->len) { | |
+ if (!(q->flags & XT_QUOTA_NO_CHANGE)) | |
+ e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; | |
+ ret = !ret; | |
+ } else { | |
+ /* We are transitioning, log that fact. */ | |
+ if (e->quota) { | |
+ quota2_log(par->hooknum, | |
+ skb, | |
+ par->in, | |
+ par->out, | |
+ q->name); | |
+ } | |
+ /* we do not allow even small packets from now on */ | |
+ e->quota = 0; | |
+ } | |
+ } | |
+ spin_unlock_bh(&e->lock); | |
+ return ret; | |
+} | |
+ | |
+static struct xt_match quota_mt2_reg[] __read_mostly = { | |
+ { | |
+ .name = "quota2", | |
+ .revision = 3, | |
+ .family = NFPROTO_IPV4, | |
+ .checkentry = quota_mt2_check, | |
+ .match = quota_mt2, | |
+ .destroy = quota_mt2_destroy, | |
+ .matchsize = sizeof(struct xt_quota_mtinfo2), | |
+ .me = THIS_MODULE, | |
+ }, | |
+ { | |
+ .name = "quota2", | |
+ .revision = 3, | |
+ .family = NFPROTO_IPV6, | |
+ .checkentry = quota_mt2_check, | |
+ .match = quota_mt2, | |
+ .destroy = quota_mt2_destroy, | |
+ .matchsize = sizeof(struct xt_quota_mtinfo2), | |
+ .me = THIS_MODULE, | |
+ }, | |
+}; | |
+ | |
+static int __init quota_mt2_init(void) | |
+{ | |
+ int ret; | |
+ pr_debug("xt_quota2: init()"); | |
+ | |
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | |
+ nflognl = netlink_kernel_create(&init_net, | |
+ NETLINK_NFLOG, 1, NULL, | |
+ NULL, THIS_MODULE); | |
+ if (!nflognl) | |
+ return -ENOMEM; | |
+#endif | |
+ | |
+ proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); | |
+ if (proc_xt_quota == NULL) | |
+ return -EACCES; | |
+ | |
+ ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); | |
+ if (ret < 0) | |
+ remove_proc_entry("xt_quota", init_net.proc_net); | |
+ pr_debug("xt_quota2: init() %d", ret); | |
+ return ret; | |
+} | |
+ | |
+static void __exit quota_mt2_exit(void) | |
+{ | |
+ xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); | |
+ remove_proc_entry("xt_quota", init_net.proc_net); | |
+} | |
+ | |
+module_init(quota_mt2_init); | |
+module_exit(quota_mt2_exit); | |
+MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); | |
+MODULE_AUTHOR("Sam Johnston <[email protected]>"); | |
+MODULE_AUTHOR("Jan Engelhardt <[email protected]>"); | |
+MODULE_LICENSE("GPL"); | |
+MODULE_ALIAS("ipt_quota2"); | |
+MODULE_ALIAS("ip6t_quota2"); | |
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c | |
index 3d54c23..cea16e9 100644 | |
--- a/net/netfilter/xt_socket.c | |
+++ b/net/netfilter/xt_socket.c | |
@@ -22,6 +22,11 @@ | |
#include <net/netfilter/nf_tproxy_core.h> | |
#include <net/netfilter/ipv4/nf_defrag_ipv4.h> | |
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) | |
+#define XT_SOCKET_HAVE_IPV6 1 | |
+#include <linux/netfilter_ipv6/ip6_tables.h> | |
+#endif | |
+ | |
#include <linux/netfilter/xt_socket.h> | |
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | |
@@ -29,8 +34,18 @@ | |
#include <net/netfilter/nf_conntrack.h> | |
#endif | |
+void | |
+xt_socket_put_sk(struct sock *sk) | |
+{ | |
+ if (sk->sk_state == TCP_TIME_WAIT) | |
+ inet_twsk_put(inet_twsk(sk)); | |
+ else | |
+ sock_put(sk); | |
+} | |
+EXPORT_SYMBOL(xt_socket_put_sk); | |
+ | |
static int | |
-extract_icmp_fields(const struct sk_buff *skb, | |
+extract_icmp4_fields(const struct sk_buff *skb, | |
u8 *protocol, | |
__be32 *raddr, | |
__be32 *laddr, | |
@@ -86,10 +101,8 @@ extract_icmp_fields(const struct sk_buff *skb, | |
return 0; | |
} | |
- | |
-static bool | |
-socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
- const struct xt_socket_mtinfo1 *info) | |
+struct sock* | |
+xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par) | |
{ | |
const struct iphdr *iph = ip_hdr(skb); | |
struct udphdr _hdr, *hp = NULL; | |
@@ -106,7 +119,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
hp = skb_header_pointer(skb, ip_hdrlen(skb), | |
sizeof(_hdr), &_hdr); | |
if (hp == NULL) | |
- return false; | |
+ return NULL; | |
protocol = iph->protocol; | |
saddr = iph->saddr; | |
@@ -115,11 +128,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
dport = hp->dest; | |
} else if (iph->protocol == IPPROTO_ICMP) { | |
- if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, | |
+ if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, | |
&sport, &dport)) | |
- return false; | |
+ return NULL; | |
} else { | |
- return false; | |
+ return NULL; | |
} | |
#ifdef XT_SOCKET_HAVE_CONNTRACK | |
@@ -130,7 +143,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
if (ct && (ct != &nf_conntrack_untracked) && | |
((iph->protocol != IPPROTO_ICMP && | |
ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || | |
- (iph->protocol == IPPROTO_ICMP && | |
+ (iph->protocol == IPPROTO_ICMP && | |
ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && | |
(ct->status & IPS_SRC_NAT_DONE)) { | |
@@ -142,7 +155,24 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
#endif | |
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, | |
- saddr, daddr, sport, dport, par->in, false); | |
+ saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); | |
+ | |
+ pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", | |
+ protocol, &saddr, ntohs(sport), | |
+ &daddr, ntohs(dport), | |
+ &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); | |
+ | |
+ return sk; | |
+} | |
+EXPORT_SYMBOL(xt_socket_get4_sk); | |
+ | |
+static bool | |
+socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
+ const struct xt_socket_mtinfo1 *info) | |
+{ | |
+ struct sock *sk; | |
+ | |
+ sk = xt_socket_get4_sk(skb, par); | |
if (sk != NULL) { | |
bool wildcard; | |
bool transparent = true; | |
@@ -159,38 +189,167 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
(sk->sk_state == TCP_TIME_WAIT && | |
inet_twsk(sk)->tw_transparent)); | |
- nf_tproxy_put_sock(sk); | |
+ xt_socket_put_sk(sk); | |
if (wildcard || !transparent) | |
sk = NULL; | |
} | |
- pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n", | |
- protocol, ntohl(saddr), ntohs(sport), | |
- ntohl(daddr), ntohs(dport), | |
- ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); | |
- | |
return (sk != NULL); | |
} | |
static bool | |
-socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) | |
+socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) | |
{ | |
return socket_match(skb, par, NULL); | |
} | |
static bool | |
-socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) | |
+socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) | |
{ | |
return socket_match(skb, par, par->matchinfo); | |
} | |
+#ifdef XT_SOCKET_HAVE_IPV6 | |
+ | |
+static int | |
+extract_icmp6_fields(const struct sk_buff *skb, | |
+ unsigned int outside_hdrlen, | |
+ int *protocol, | |
+ struct in6_addr **raddr, | |
+ struct in6_addr **laddr, | |
+ __be16 *rport, | |
+ __be16 *lport) | |
+{ | |
+ struct ipv6hdr *inside_iph, _inside_iph; | |
+ struct icmp6hdr *icmph, _icmph; | |
+ __be16 *ports, _ports[2]; | |
+ u8 inside_nexthdr; | |
+ int inside_hdrlen; | |
+ | |
+ icmph = skb_header_pointer(skb, outside_hdrlen, | |
+ sizeof(_icmph), &_icmph); | |
+ if (icmph == NULL) | |
+ return 1; | |
+ | |
+ if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) | |
+ return 1; | |
+ | |
+ inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph); | |
+ if (inside_iph == NULL) | |
+ return 1; | |
+ inside_nexthdr = inside_iph->nexthdr; | |
+ | |
+ inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr); | |
+ if (inside_hdrlen < 0) | |
+ return 1; /* hjm: Packet has no/incomplete transport layer headers. */ | |
+ | |
+ if (inside_nexthdr != IPPROTO_TCP && | |
+ inside_nexthdr != IPPROTO_UDP) | |
+ return 1; | |
+ | |
+ ports = skb_header_pointer(skb, inside_hdrlen, | |
+ sizeof(_ports), &_ports); | |
+ if (ports == NULL) | |
+ return 1; | |
+ | |
+ /* the inside IP packet is the one quoted from our side, thus | |
+ * its saddr is the local address */ | |
+ *protocol = inside_nexthdr; | |
+ *laddr = &inside_iph->saddr; | |
+ *lport = ports[0]; | |
+ *raddr = &inside_iph->daddr; | |
+ *rport = ports[1]; | |
+ | |
+ return 0; | |
+} | |
+ | |
+struct sock* | |
+xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par) | |
+{ | |
+ struct ipv6hdr *iph = ipv6_hdr(skb); | |
+ struct udphdr _hdr, *hp = NULL; | |
+ struct sock *sk; | |
+ struct in6_addr *daddr, *saddr; | |
+ __be16 dport, sport; | |
+ int thoff, tproto; | |
+ | |
+ tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); | |
+ if (tproto < 0) { | |
+ pr_debug("unable to find transport header in IPv6 packet, dropping\n"); | |
+ return NF_DROP; | |
+ } | |
+ | |
+ if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { | |
+ hp = skb_header_pointer(skb, thoff, | |
+ sizeof(_hdr), &_hdr); | |
+ if (hp == NULL) | |
+ return NULL; | |
+ | |
+ saddr = &iph->saddr; | |
+ sport = hp->source; | |
+ daddr = &iph->daddr; | |
+ dport = hp->dest; | |
+ | |
+ } else if (tproto == IPPROTO_ICMPV6) { | |
+ if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, | |
+ &sport, &dport)) | |
+ return NULL; | |
+ } else { | |
+ return NULL; | |
+ } | |
+ | |
+ sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, | |
+ saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); | |
+ pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " | |
+ "(orig %pI6:%hu) sock %p\n", | |
+ tproto, saddr, ntohs(sport), | |
+ daddr, ntohs(dport), | |
+ &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); | |
+ return sk; | |
+} | |
+EXPORT_SYMBOL(xt_socket_get6_sk); | |
+ | |
+static bool | |
+socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) | |
+{ | |
+ struct sock *sk; | |
+ const struct xt_socket_mtinfo1 *info; | |
+ | |
+ info = (struct xt_socket_mtinfo1 *) par->matchinfo; | |
+ sk = xt_socket_get6_sk(skb, par); | |
+ if (sk != NULL) { | |
+ bool wildcard; | |
+ bool transparent = true; | |
+ | |
+ /* Ignore sockets listening on INADDR_ANY */ | |
+ wildcard = (sk->sk_state != TCP_TIME_WAIT && | |
+ ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); | |
+ | |
+ /* Ignore non-transparent sockets, | |
+ if XT_SOCKET_TRANSPARENT is used */ | |
+ if (info && info->flags & XT_SOCKET_TRANSPARENT) | |
+ transparent = ((sk->sk_state != TCP_TIME_WAIT && | |
+ inet_sk(sk)->transparent) || | |
+ (sk->sk_state == TCP_TIME_WAIT && | |
+ inet_twsk(sk)->tw_transparent)); | |
+ | |
+ xt_socket_put_sk(sk); | |
+ | |
+ if (wildcard || !transparent) | |
+ sk = NULL; | |
+ } | |
+ | |
+ return (sk != NULL); | |
+} | |
+#endif | |
+ | |
static struct xt_match socket_mt_reg[] __read_mostly = { | |
{ | |
.name = "socket", | |
.revision = 0, | |
.family = NFPROTO_IPV4, | |
- .match = socket_mt_v0, | |
+ .match = socket_mt4_v0, | |
.hooks = (1 << NF_INET_PRE_ROUTING) | | |
(1 << NF_INET_LOCAL_IN), | |
.me = THIS_MODULE, | |
@@ -199,12 +358,24 @@ static struct xt_match socket_mt_reg[] __read_mostly = { | |
.name = "socket", | |
.revision = 1, | |
.family = NFPROTO_IPV4, | |
- .match = socket_mt_v1, | |
+ .match = socket_mt4_v1, | |
.matchsize = sizeof(struct xt_socket_mtinfo1), | |
.hooks = (1 << NF_INET_PRE_ROUTING) | | |
(1 << NF_INET_LOCAL_IN), | |
.me = THIS_MODULE, | |
}, | |
+#ifdef XT_SOCKET_HAVE_IPV6 | |
+ { | |
+ .name = "socket", | |
+ .revision = 1, | |
+ .family = NFPROTO_IPV6, | |
+ .match = socket_mt6_v1, | |
+ .matchsize = sizeof(struct xt_socket_mtinfo1), | |
+ .hooks = (1 << NF_INET_PRE_ROUTING) | | |
+ (1 << NF_INET_LOCAL_IN), | |
+ .me = THIS_MODULE, | |
+ }, | |
+#endif | |
}; | |
static int __init socket_mt_init(void) | |
@@ -225,3 +396,4 @@ MODULE_LICENSE("GPL"); | |
MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); | |
MODULE_DESCRIPTION("x_tables socket match module"); | |
MODULE_ALIAS("ipt_socket"); | |
+MODULE_ALIAS("ip6t_socket"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment