Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
2005-04-16 15:20:36 -07:00
commit 1da177e4c3
17291 changed files with 6718755 additions and 0 deletions
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -0,0 +1,508 @@
+#
+# Traffic control configuration.
+# 
+choice
+	prompt "Packet scheduler clock source"
+	depends on NET_SCHED
+	default NET_SCH_CLK_JIFFIES
+	help
+	  Packet schedulers need a monotonic clock that increments at a static
+	  rate. The kernel provides several suitable interfaces, each with
+	  different properties:
+	  
+	  - high resolution (us or better)
+	  - fast to read (minimal locking, no i/o access)
+	  - synchronized on all processors
+	  - handles cpu clock frequency changes
+
+	  but nothing provides all of the above.
+
+config NET_SCH_CLK_JIFFIES
+	bool "Timer interrupt"
+	help
+	  Say Y here if you want to use the timer interrupt (jiffies) as clock
+	  source. This clock source is fast, synchronized on all processors and
+	  handles cpu clock frequency changes, but its resolution is too low
+	  for accurate shaping except at very low speed.
+
+config NET_SCH_CLK_GETTIMEOFDAY
+	bool "gettimeofday"
+	help
+	  Say Y here if you want to use gettimeofday as clock source. This clock
+	  source has high resolution, is synchronized on all processors and
+	  handles cpu clock frequency changes, but it is slow.
+
+	  Choose this if you need a high resolution clock source but can't use
+	  the CPU's cycle counter.
+
+config NET_SCH_CLK_CPU
+	bool "CPU cycle counter"
+	depends on X86_TSC || X86_64 || ALPHA || SPARC64 || PPC64 || IA64
+	help
+	  Say Y here if you want to use the CPU's cycle counter as clock source.
+	  This is a cheap and high resolution clock source, but on some
+	  architectures it is not synchronized on all processors and doesn't
+	  handle cpu clock frequency changes.
+
+	  The useable cycle counters are:
+
+	  	x86/x86_64	- Timestamp Counter
+		alpha		- Cycle Counter
+		sparc64		- %ticks register
+		ppc64		- Time base
+		ia64		- Interval Time Counter
+
+	  Choose this if your CPU's cycle counter is working properly.
+
+endchoice
+
+config NET_SCH_CBQ
+	tristate "CBQ packet scheduler"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Class-Based Queueing (CBQ) packet
+	  scheduling algorithm for some of your network devices.  This
+	  algorithm classifies the waiting packets into a tree-like hierarchy
+	  of classes; the leaves of this tree are in turn scheduled by
+	  separate algorithms (called "disciplines" in this context).
+
+	  See the top of <file:net/sched/sch_cbq.c> for references about the
+	  CBQ algorithm.
+
+	  CBQ is a commonly used scheduler, so if you're unsure, you should
+	  say Y here. Then say Y to all the queueing algorithms below that you
+	  want to use as CBQ disciplines.  Then say Y to "Packet classifier
+	  API" and say Y to all the classifiers you want to use; a classifier
+	  is a routine that allows you to sort your outgoing traffic into
+	  classes based on a certain criterion.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_cbq.
+
+config NET_SCH_HTB
+	tristate "HTB packet scheduler"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Hierarchical Token Buckets (HTB)
+	  packet scheduling algorithm for some of your network devices. See
+	  <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
+	  in-depth articles.
+
+	  HTB is very similar to the CBQ regarding its goals however is has 
+	  different properties and different algorithm.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_htb.
+
+config NET_SCH_HFSC
+	tristate "HFSC packet scheduler"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Hierarchical Fair Service Curve
+	  (HFSC) packet scheduling algorithm for some of your network devices.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_hfsc.
+
+#tristate '  H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ
+config NET_SCH_ATM
+	tristate "ATM pseudo-scheduler"
+	depends on NET_SCHED && ATM
+	---help---
+	  Say Y here if you want to use the ATM pseudo-scheduler.  This
+	  provides a framework for invoking classifiers (aka "filters"), which
+	  in turn select classes of this queuing discipline.  Each class maps
+	  the flow(s) it is handling to a given virtual circuit (see the top of
+	  <file:net/sched/sch_atm.c>).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_atm.
+
+config NET_SCH_PRIO
+	tristate "The simplest PRIO pseudoscheduler"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use an n-band priority queue packet
+	  "scheduler" for some of your network devices or as a leaf discipline
+	  for the CBQ scheduling algorithm. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_prio.
+
+config NET_SCH_RED
+	tristate "RED queue"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use the Random Early Detection (RED)
+	  packet scheduling algorithm for some of your network devices (see
+	  the top of <file:net/sched/sch_red.c> for details and references
+	  about the algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_red.
+
+config NET_SCH_SFQ
+	tristate "SFQ queue"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
+	  packet scheduling algorithm for some of your network devices or as a
+	  leaf discipline for the CBQ scheduling algorithm (see the top of
+	  <file:net/sched/sch_sfq.c> for details and references about the SFQ
+	  algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_sfq.
+
+config NET_SCH_TEQL
+	tristate "TEQL queue"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the True Link Equalizer (TLE) packet
+	  scheduling algorithm for some of your network devices or as a leaf
+	  discipline for the CBQ scheduling algorithm. This queueing
+	  discipline allows the combination of several physical devices into
+	  one virtual device. (see the top of <file:net/sched/sch_teql.c> for
+	  details).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_teql.
+
+config NET_SCH_TBF
+	tristate "TBF queue"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use the Simple Token Bucket Filter (TBF)
+	  packet scheduling algorithm for some of your network devices or as a
+	  leaf discipline for the CBQ scheduling algorithm (see the top of
+	  <file:net/sched/sch_tbf.c> for a description of the TBF algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_tbf.
+
+config NET_SCH_GRED
+	tristate "GRED queue"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use the Generic Random Early Detection
+	  (RED) packet scheduling algorithm for some of your network devices
+	  (see the top of <file:net/sched/sch_red.c> for details and
+	  references about the algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_gred.
+
+config NET_SCH_DSMARK
+	tristate "Diffserv field marker"
+	depends on NET_SCHED
+	help
+	  Say Y if you want to schedule packets according to the
+	  Differentiated Services architecture proposed in RFC 2475.
+	  Technical information on this method, with pointers to associated
+	  RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_dsmark.
+
+config NET_SCH_NETEM
+	tristate "Network emulator"
+	depends on NET_SCHED
+	help
+	  Say Y if you want to emulate network delay, loss, and packet
+	  re-ordering. This is often useful to simulate networks when
+	  testing applications or protocols.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_netem.
+
+	  If unsure, say N.
+
+config NET_SCH_INGRESS
+	tristate "Ingress Qdisc"
+	depends on NET_SCHED 
+	help
+	  If you say Y here, you will be able to police incoming bandwidth
+	  and drop packets when this bandwidth exceeds your desired rate.
+	  If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_ingress.
+
+config NET_QOS
+	bool "QoS support"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to include Quality Of Service scheduling
+	  features, which means that you will be able to request certain
+	  rate-of-flow limits for your network devices.
+
+	  This Quality of Service (QoS) support will enable you to use
+	  Differentiated Services (diffserv) and Resource Reservation Protocol
+	  (RSVP) on your Linux router if you also say Y to "Packet classifier
+	  API" and to some classifiers below. Documentation and software is at
+	  <http://diffserv.sourceforge.net/>.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about QoS support.
+
+config NET_ESTIMATOR
+	bool "Rate estimator"
+	depends on NET_QOS
+	help
+	  In order for Quality of Service scheduling to work, the current
+	  rate-of-flow for a network device has to be estimated; if you say Y
+	  here, the kernel will do just that.
+
+config NET_CLS
+	bool "Packet classifier API"
+	depends on NET_SCHED
+	---help---
+	  The CBQ scheduling algorithm requires that network packets which are
+	  scheduled to be sent out over a network device be classified
+	  according to some criterion. If you say Y here, you will get a
+	  choice of several different packet classifiers with the following
+	  questions.
+
+	  This will enable you to use Differentiated Services (diffserv) and
+	  Resource Reservation Protocol (RSVP) on your Linux router.
+	  Documentation and software is at
+	  <http://diffserv.sourceforge.net/>.
+
+config NET_CLS_BASIC
+	tristate "Basic classifier"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  only extended matches and actions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_basic.
+
+config NET_CLS_TCINDEX
+	tristate "TC index classifier"
+	depends on NET_CLS
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to the tc_index field of the skb. You will want this
+	  feature if you want to implement Differentiated Services using
+	  sch_dsmark. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_tcindex.
+
+config NET_CLS_ROUTE4
+	tristate "Routing table based classifier"
+	depends on NET_CLS
+	select NET_CLS_ROUTE
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to the route table entry they matched. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_route.
+
+config NET_CLS_ROUTE
+	bool
+	default n
+
+config NET_CLS_FW
+	tristate "Firewall based classifier"
+	depends on NET_CLS
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to firewall criteria you specified.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_fw.
+
+config NET_CLS_U32
+	tristate "U32 classifier"
+	depends on NET_CLS
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to their destination address. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_u32.
+
+config CLS_U32_PERF
+	bool "U32 classifier performance counters"
+	depends on NET_CLS_U32
+	help
+	  gathers stats that could be used to tune u32 classifier performance.
+	  Requires a new iproute2
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config NET_CLS_IND
+	bool "classify input device (slows things u32/fw) "
+	depends on NET_CLS_U32 || NET_CLS_FW
+	help
+	  This option will be killed eventually when a 
+          metadata action appears because it slows things a little
+          Available only for u32 and fw classifiers.
+	  Requires a new iproute2
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config CLS_U32_MARK
+	bool "Use nfmark as a key in U32 classifier"
+	depends on NET_CLS_U32 && NETFILTER
+	help
+	  This allows you to match mark in a u32 filter.
+	  Example:
+	  tc filter add dev eth0 protocol ip parent 1:0 prio 5 u32 \
+		match mark 0x0090 0xffff \
+		match ip dst 4.4.4.4 \
+		flowid 1:90
+	  You must use a new iproute2 to use this feature.
+
+config NET_CLS_RSVP
+	tristate "Special RSVP classifier"
+	depends on NET_CLS && NET_QOS
+	---help---
+	  The Resource Reservation Protocol (RSVP) permits end systems to
+	  request a minimum and maximum data flow rate for a connection; this
+	  is important for real time data such as streaming sound or video.
+
+	  Say Y here if you want to be able to classify outgoing packets based
+	  on their RSVP requests.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_rsvp.
+
+config NET_CLS_RSVP6
+	tristate "Special RSVP classifier for IPv6"
+	depends on NET_CLS && NET_QOS
+	---help---
+	  The Resource Reservation Protocol (RSVP) permits end systems to
+	  request a minimum and maximum data flow rate for a connection; this
+	  is important for real time data such as streaming sound or video.
+
+	  Say Y here if you want to be able to classify outgoing packets based
+	  on their RSVP requests and you are using the new Internet Protocol
+	  IPv6 as opposed to the older and more common IPv4.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_rsvp6.
+
+config NET_EMATCH
+	bool "Extended Matches"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to use extended matches on top of classifiers
+	  and select the extended matches below.
+
+	  Extended matches are small classification helpers not worth writing
+	  a separate classifier.
+
+	  You must have a recent version of the iproute2 tools in order to use
+	  extended matches.
+
+config NET_EMATCH_STACK
+	int "Stack size"
+	depends on NET_EMATCH
+	default "32"
+	---help---
+	  Size of the local stack variable used while evaluating the tree of
+	  ematches. Limits the depth of the tree, i.e. the number of
+	  encapsulated precedences. Every level requires 4 bytes of addtional
+	  stack space.
+
+config NET_EMATCH_CMP
+	tristate "Simple packet data comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  simple packet data comparisons for 8, 16, and 32bit values.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_cmp.
+
+config NET_EMATCH_NBYTE
+	tristate "Multi byte comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  multiple byte comparisons mainly useful for IPv6 address comparisons.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_nbyte.
+
+config NET_EMATCH_U32
+	tristate "U32 hashing key"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  the famous u32 key in combination with logic relations.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_u32.
+
+config NET_EMATCH_META
+	tristate "Metadata"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be ablt to classify packets based on
+	  metadata such as load average, netfilter attributes, socket
+	  attributes and routing decisions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_meta.
+
+config NET_CLS_ACT
+	bool "Packet ACTION"
+	depends on EXPERIMENTAL && NET_CLS && NET_QOS
+	---help---
+	This option requires you have a new iproute2. It enables
+	tc extensions which can be used with tc classifiers.
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config NET_ACT_POLICE
+	tristate "Policing Actions"
+        depends on NET_CLS_ACT 
+        ---help---
+        If you are using a newer iproute2 select this one, otherwise use one
+	below to select a policer.
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config NET_ACT_GACT
+        tristate "generic Actions"
+        depends on NET_CLS_ACT
+        ---help---
+        You must have new iproute2 to use this feature.
+        This adds simple filtering actions like drop, accept etc.
+
+config GACT_PROB
+        bool "generic Actions probability"
+        depends on NET_ACT_GACT
+        ---help---
+        Allows generic actions to be randomly or deterministically used.
+
+config NET_ACT_MIRRED
+        tristate "Packet In/Egress redirecton/mirror Actions"
+        depends on NET_CLS_ACT
+        ---help---
+        requires new iproute2
+        This allows packets to be mirrored or redirected to netdevices
+
+config NET_ACT_IPT
+        tristate "iptables Actions"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        ---help---
+        requires new iproute2
+        This allows iptables targets to be used by tc filters
+
+config NET_ACT_PEDIT
+        tristate "Generic Packet Editor Actions"
+        depends on NET_CLS_ACT
+        ---help---
+        requires new iproute2
+        This allows for packets to be generically edited
+
+config NET_CLS_POLICE
+	bool "Traffic policing (needed for in/egress)"
+	depends on NET_CLS && NET_QOS && NET_CLS_ACT!=y
+	help
+	  Say Y to support traffic policing (bandwidth limits).  Needed for
+	  ingress and egress rate limiting.
+
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -0,0 +1,41 @@
+#
+# Makefile for the Linux Traffic Control Unit.
+#
+
+obj-y	:= sch_generic.o
+
+obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_fifo.o
+obj-$(CONFIG_NET_CLS)		+= cls_api.o
+obj-$(CONFIG_NET_CLS_ACT)       += act_api.o
+obj-$(CONFIG_NET_ACT_POLICE)	+= police.o
+obj-$(CONFIG_NET_CLS_POLICE)	+= police.o
+obj-$(CONFIG_NET_ACT_GACT)      += gact.o
+obj-$(CONFIG_NET_ACT_MIRRED)    += mirred.o
+obj-$(CONFIG_NET_ACT_IPT)       += ipt.o
+obj-$(CONFIG_NET_ACT_PEDIT)     += pedit.o
+obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
+obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
+obj-$(CONFIG_NET_SCH_HPFQ)	+= sch_hpfq.o
+obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
+obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
+obj-$(CONFIG_NET_SCH_GRED)	+= sch_gred.o
+obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o 
+obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
+obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
+obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
+obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
+obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
+obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
+obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
+obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
+obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
+obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
+obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
+obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
+obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
+obj-$(CONFIG_NET_EMATCH)	+= ematch.o
+obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
+obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
+obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
+obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -0,0 +1,894 @@
+/*
+ * net/sched/act_api.c	Packet action API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Author:	Jamal Hadi Salim
+ *
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <net/sock.h>
+#include <net/sch_generic.h>
+#include <net/act_api.h>
+
+#if 1 /* control */
+#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
+#else
+#define DPRINTK(format, args...)
+#endif
+#if 0 /* data */
+#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args)
+#else
+#define D2PRINTK(format, args...)
+#endif
+
+static struct tc_action_ops *act_base = NULL;
+static DEFINE_RWLOCK(act_mod_lock);
+
+int tcf_register_action(struct tc_action_ops *act)
+{
+	struct tc_action_ops *a, **ap;
+
+	write_lock(&act_mod_lock);
+	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
+		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+			write_unlock(&act_mod_lock);
+			return -EEXIST;
+		}
+	}
+	act->next = NULL;
+	*ap = act;
+	write_unlock(&act_mod_lock);
+	return 0;
+}
+
+int tcf_unregister_action(struct tc_action_ops *act)
+{
+	struct tc_action_ops *a, **ap;
+	int err = -ENOENT;
+
+	write_lock(&act_mod_lock);
+	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next)
+		if (a == act)
+			break;
+	if (a) {
+		*ap = a->next;
+		a->next = NULL;
+		err = 0;
+	}
+	write_unlock(&act_mod_lock);
+	return err;
+}
+
+/* lookup by name */
+static struct tc_action_ops *tc_lookup_action_n(char *kind)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (kind) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (strcmp(kind, a->kind) == 0) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+
+/* lookup by rtattr */
+static struct tc_action_ops *tc_lookup_action(struct rtattr *kind)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (kind) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (rtattr_strcmp(kind, a->kind) == 0) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+
+#if 0
+/* lookup by id */
+static struct tc_action_ops *tc_lookup_action_id(u32 type)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (type) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (a->type == type) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+#endif
+
+int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
+                    struct tcf_result *res)
+{
+	struct tc_action *a;
+	int ret = -1;
+
+	if (skb->tc_verd & TC_NCLS) {
+		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+		D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n",
+		         skb, skb->input_dev ? skb->input_dev->name : "xxx",
+		         skb->dev->name);
+		ret = TC_ACT_OK;
+		goto exec_done;
+	}
+	while ((a = act) != NULL) {
+repeat:
+		if (a->ops && a->ops->act) {
+			ret = a->ops->act(&skb, a);
+			if (TC_MUNGED & skb->tc_verd) {
+				/* copied already, allow trampling */
+				skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+				skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+			}
+			if (ret != TC_ACT_PIPE)
+				goto exec_done;
+			if (ret == TC_ACT_REPEAT)
+				goto repeat;	/* we need a ttl - JHS */
+		}
+		act = a->next;
+	}
+exec_done:
+	if (skb->tc_classid > 0) {
+		res->classid = skb->tc_classid;
+		res->class = 0;
+		skb->tc_classid = 0;
+	}
+	return ret;
+}
+
+void tcf_action_destroy(struct tc_action *act, int bind)
+{
+	struct tc_action *a;
+
+	for (a = act; a; a = act) {
+		if (a->ops && a->ops->cleanup) {
+			DPRINTK("tcf_action_destroy destroying %p next %p\n",
+			        a, a->next);
+			if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
+				module_put(a->ops->owner);
+			act = act->next;
+			kfree(a);
+		} else { /*FIXME: Remove later - catch insertion bugs*/
+			printk("tcf_action_destroy: BUG? destroying NULL ops\n");
+			act = act->next;
+			kfree(a);
+		}
+	}
+}
+
+int
+tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+
+	if (a->ops == NULL || a->ops->dump == NULL)
+		return err;
+	return a->ops->dump(skb, a, bind, ref);
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+	unsigned char *b = skb->tail;
+	struct rtattr *r;
+
+	if (a->ops == NULL || a->ops->dump == NULL)
+		return err;
+
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	if (tcf_action_copy_stats(skb, a, 0))
+		goto rtattr_failure;
+	r = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+		r->rta_len = skb->tail - (u8*)r;
+		return err;
+	}
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int
+tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
+{
+	struct tc_action *a;
+	int err = -EINVAL;
+	unsigned char *b = skb->tail;
+	struct rtattr *r ;
+
+	while ((a = act) != NULL) {
+		r = (struct rtattr*) skb->tail;
+		act = a->next;
+		RTA_PUT(skb, a->order, 0, NULL);
+		err = tcf_action_dump_1(skb, a, bind, ref);
+		if (err < 0)
+			goto rtattr_failure;
+		r->rta_len = skb->tail - (u8*)r;
+	}
+
+	return 0;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -err;
+}
+
+struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
+                                    char *name, int ovr, int bind, int *err)
+{
+	struct tc_action *a;
+	struct tc_action_ops *a_o;
+	char act_name[IFNAMSIZ];
+	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct rtattr *kind;
+
+	*err = -EINVAL;
+
+	if (name == NULL) {
+		if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+			goto err_out;
+		kind = tb[TCA_ACT_KIND-1];
+		if (kind == NULL)
+			goto err_out;
+		if (rtattr_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+			goto err_out;
+	} else {
+		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+			goto err_out;
+	}
+
+	a_o = tc_lookup_action_n(act_name);
+	if (a_o == NULL) {
+#ifdef CONFIG_KMOD
+		rtnl_unlock();
+		request_module(act_name);
+		rtnl_lock();
+
+		a_o = tc_lookup_action_n(act_name);
+
+		/* We dropped the RTNL semaphore in order to
+		 * perform the module load.  So, even if we
+		 * succeeded in loading the module we have to
+		 * tell the caller to replay the request.  We
+		 * indicate this using -EAGAIN.
+		 */
+		if (a_o != NULL) {
+			*err = -EAGAIN;
+			goto err_mod;
+		}
+#endif
+		goto err_out;
+	}
+
+	*err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_KERNEL);
+	if (a == NULL)
+		goto err_mod;
+	memset(a, 0, sizeof(*a));
+
+	/* backward compatibility for policer */
+	if (name == NULL)
+		*err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind);
+	else
+		*err = a_o->init(rta, est, a, ovr, bind);
+	if (*err < 0)
+		goto err_free;
+
+	/* module count goes up only when brand new policy is created
+	   if it exists and is only bound to in a_o->init() then
+	   ACT_P_CREATED is not returned (a zero is).
+	*/
+	if (*err != ACT_P_CREATED)
+		module_put(a_o->owner);
+	a->ops = a_o;
+	DPRINTK("tcf_action_init_1: successfull %s\n", act_name);
+
+	*err = 0;
+	return a;
+
+err_free:
+	kfree(a);
+err_mod:
+	module_put(a_o->owner);
+err_out:
+	return NULL;
+}
+
+struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est,
+                                  char *name, int ovr, int bind, int *err)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct tc_action *head = NULL, *act, *act_prev = NULL;
+	int i;
+
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) {
+		*err = -EINVAL;
+		return head;
+	}
+
+	for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		act = tcf_action_init_1(tb[i], est, name, ovr, bind, err);
+		if (act == NULL)
+			goto err;
+		act->order = i+1;
+
+		if (head == NULL)
+			head = act;
+		else
+			act_prev->next = act;
+		act_prev = act;
+	}
+	return head;
+
+err:
+	if (head != NULL)
+		tcf_action_destroy(head, bind);
+	return NULL;
+}
+
+int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
+			  int compat_mode)
+{
+	int err = 0;
+	struct gnet_dump d;
+	struct tcf_act_hdr *h = a->priv;
+	
+	if (h == NULL)
+		goto errout;
+
+	/* compat_mode being true specifies a call that is supposed
+	 * to add additional backward compatiblity statistic TLVs.
+	 */
+	if (compat_mode) {
+		if (a->type == TCA_OLD_COMPAT)
+			err = gnet_stats_start_copy_compat(skb, 0,
+				TCA_STATS, TCA_XSTATS, h->stats_lock, &d);
+		else
+			return 0;
+	} else
+		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
+			h->stats_lock, &d);
+
+	if (err < 0)
+		goto errout;
+
+	if (a->ops != NULL && a->ops->get_stats != NULL)
+		if (a->ops->get_stats(skb, a) < 0)
+			goto errout;
+
+	if (gnet_stats_copy_basic(&d, &h->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(&d, &h->qstats) < 0)
+		goto errout;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto errout;
+
+	return 0;
+
+errout:
+	return -1;
+}
+
+static int
+tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
+             unsigned flags, int event, int bind, int ref)
+{
+	struct tcamsg *t;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	struct rtattr *x;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
+	nlh->nlmsg_flags = flags;
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	
+	x = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	if (tcf_action_dump(skb, a, bind, ref) < 0)
+		goto rtattr_failure;
+
+	x->rta_len = skb->tail - (u8*)x;
+	
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
+{
+	struct sk_buff *skb;
+	int err = 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+	if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+	return err;
+}
+
+static struct tc_action *
+tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err)
+{
+	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct tc_action *a;
+	int index;
+
+	*err = -EINVAL;
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+		return NULL;
+
+	if (tb[TCA_ACT_INDEX - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_ACT_INDEX - 1]) < sizeof(index))
+		return NULL;
+	index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]);
+
+	*err = -ENOMEM;
+	a = kmalloc(sizeof(struct tc_action), GFP_KERNEL);
+	if (a == NULL)
+		return NULL;
+	memset(a, 0, sizeof(struct tc_action));
+
+	*err = -EINVAL;
+	a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]);
+	if (a->ops == NULL)
+		goto err_free;
+	if (a->ops->lookup == NULL)
+		goto err_mod;
+	*err = -ENOENT;
+	if (a->ops->lookup(a, index) == 0)
+		goto err_mod;
+
+	module_put(a->ops->owner);
+	*err = 0;
+	return a;
+err_mod:
+	module_put(a->ops->owner);
+err_free:
+	kfree(a);
+	return NULL;
+}
+
+static void cleanup_a(struct tc_action *act)
+{
+	struct tc_action *a;
+
+	for (a = act; a; a = act) {
+		act = a->next;
+		kfree(a);
+	}
+}
+
+static struct tc_action *create_a(int i)
+{
+	struct tc_action *act;
+
+	act = kmalloc(sizeof(*act), GFP_KERNEL);
+	if (act == NULL) {
+		printk("create_a: failed to alloc!\n");
+		return NULL;
+	}
+	memset(act, 0, sizeof(*act));
+	act->order = i;
+	return act;
+}
+
+static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
+{
+	struct sk_buff *skb;
+	unsigned char *b;
+	struct nlmsghdr *nlh;
+	struct tcamsg *t;
+	struct netlink_callback dcb;
+	struct rtattr *x;
+	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct rtattr *kind;
+	struct tc_action *a = create_a(0);
+	int err = -EINVAL;
+
+	if (a == NULL) {
+		printk("tca_action_flush: couldnt create tc_action\n");
+		return err;
+	}
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb) {
+		printk("tca_action_flush: failed skb alloc\n");
+		kfree(a);
+		return -ENOBUFS;
+	}
+
+	b = (unsigned char *)skb->tail;
+
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+		goto err_out;
+
+	kind = tb[TCA_ACT_KIND-1];
+	a->ops = tc_lookup_action(kind);
+	if (a->ops == NULL)
+		goto err_out;
+
+	nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t));
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+
+	x = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
+	if (err < 0)
+		goto rtattr_failure;
+
+	x->rta_len = skb->tail - (u8 *) x;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_flags |= NLM_F_ROOT;
+	module_put(a->ops->owner);
+	kfree(a);
+	err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	if (err > 0)
+		return 0;
+
+	return err;
+
+rtattr_failure:
+	module_put(a->ops->owner);
+nlmsg_failure:
+err_out:
+	kfree_skb(skb);
+	kfree(a);
+	return err;
+}
+
+static int
+tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
+{
+	int i, ret = 0;
+	struct rtattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct tc_action *head = NULL, *act, *act_prev = NULL;
+
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0)
+		return -EINVAL;
+
+	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+		if (tb[0] != NULL && tb[1] == NULL)
+			return tca_action_flush(tb[0], n, pid);
+	}
+
+	for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		act = tcf_action_get_1(tb[i], n, pid, &ret);
+		if (act == NULL)
+			goto err;
+		act->order = i+1;
+
+		if (head == NULL)
+			head = act;
+		else
+			act_prev->next = act;
+		act_prev = act;
+	}
+
+	if (event == RTM_GETACTION)
+		ret = act_get_notify(pid, n, head, event);
+	else { /* delete */
+		struct sk_buff *skb;
+
+		skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+		if (!skb) {
+			ret = -ENOBUFS;
+			goto err;
+		}
+
+		if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event,
+		                 0, 1) <= 0) {
+			kfree_skb(skb);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		/* now do the delete */
+		tcf_action_destroy(head, 0);
+		ret = rtnetlink_send(skb, pid, RTMGRP_TC,
+		                     n->nlmsg_flags&NLM_F_ECHO);
+		if (ret > 0)
+			return 0;
+		return ret;
+	}
+err:
+	cleanup_a(head);
+	return ret;
+}
+
+static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
+                          unsigned flags)
+{
+	struct tcamsg *t;
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	struct rtattr *x;
+	unsigned char *b;
+	int err = 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	b = (unsigned char *)skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
+	nlh->nlmsg_flags = flags;
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	
+	x = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	if (tcf_action_dump(skb, a, 0, 0) < 0)
+		goto rtattr_failure;
+
+	x->rta_len = skb->tail - (u8*)x;
+	
+	nlh->nlmsg_len = skb->tail - b;
+	NETLINK_CB(skb).dst_groups = RTMGRP_TC;
+	
+	err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
+
+rtattr_failure:
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+	
+static int
+tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int ovr)
+{
+	int ret = 0;
+	struct tc_action *act;
+	struct tc_action *a;
+	u32 seq = n->nlmsg_seq;
+
+	act = tcf_action_init(rta, NULL, NULL, ovr, 0, &ret);
+	if (act == NULL)
+		goto done;
+
+	/* dump then free all the actions after update; inserted policy
+	 * stays intact
+	 * */
+	ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	for (a = act; a; a = act) {
+		act = a->next;
+		kfree(a);
+	}
+done:
+	return ret;
+}
+
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct rtattr **tca = arg;
+	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	int ret = 0, ovr = 0;
+
+	if (tca[TCA_ACT_TAB-1] == NULL) {
+		printk("tc_ctl_action: received NO action attribs\n");
+		return -EINVAL;
+	}
+
+	/* n->nlmsg_flags&NLM_F_CREATE
+	 * */
+	switch (n->nlmsg_type) {
+	case RTM_NEWACTION:
+		/* we are going to assume all other flags
+		 * imply create only if it doesnt exist
+		 * Note that CREATE | EXCL implies that
+		 * but since we want avoid ambiguity (eg when flags
+		 * is zero) then just set this
+		 */
+		if (n->nlmsg_flags&NLM_F_REPLACE)
+			ovr = 1;
+replay:
+		ret = tcf_action_add(tca[TCA_ACT_TAB-1], n, pid, ovr);
+		if (ret == -EAGAIN)
+			goto replay;
+		break;
+	case RTM_DELACTION:
+		ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_DELACTION);
+		break;
+	case RTM_GETACTION:
+		ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_GETACTION);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+static char *
+find_dump_kind(struct nlmsghdr *n)
+{
+	struct rtattr *tb1, *tb2[TCA_ACT_MAX+1];
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	struct rtattr *rta[TCAA_MAX + 1];
+	struct rtattr *kind;
+	int min_len = NLMSG_LENGTH(sizeof(struct tcamsg));
+	int attrlen = n->nlmsg_len - NLMSG_ALIGN(min_len);
+	struct rtattr *attr = (void *) n + NLMSG_ALIGN(min_len);
+
+	if (rtattr_parse(rta, TCAA_MAX, attr, attrlen) < 0)
+		return NULL;
+	tb1 = rta[TCA_ACT_TAB - 1];
+	if (tb1 == NULL)
+		return NULL;
+
+	if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(tb1),
+	                 NLMSG_ALIGN(RTA_PAYLOAD(tb1))) < 0)
+		return NULL;
+	if (tb[0] == NULL)
+		return NULL;
+
+	if (rtattr_parse(tb2, TCA_ACT_MAX, RTA_DATA(tb[0]),
+	                 RTA_PAYLOAD(tb[0])) < 0)
+		return NULL;
+	kind = tb2[TCA_ACT_KIND-1];
+
+	return (char *) RTA_DATA(kind);
+}
+
+static int
+tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	struct rtattr *x;
+	struct tc_action_ops *a_o;
+	struct tc_action a;
+	int ret = 0;
+	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
+	char *kind = find_dump_kind(cb->nlh);
+
+	if (kind == NULL) {
+		printk("tc_dump_action: action bad kind\n");
+		return 0;
+	}
+
+	a_o = tc_lookup_action_n(kind);
+	if (a_o == NULL) {
+		printk("failed to find %s\n", kind);
+		return 0;
+	}
+
+	memset(&a, 0, sizeof(struct tc_action));
+	a.ops = a_o;
+
+	if (a_o->walk == NULL) {
+		printk("tc_dump_action: %s !capable of dumping table\n", kind);
+		goto rtattr_failure;
+	}
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	                cb->nlh->nlmsg_type, sizeof(*t));
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+
+	x = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
+	if (ret < 0)
+		goto rtattr_failure;
+
+	if (ret > 0) {
+		x->rta_len = skb->tail - (u8 *) x;
+		ret = skb->len;
+	} else
+		skb_trim(skb, (u8*)x - skb->data);
+
+	nlh->nlmsg_len = skb->tail - b;
+	if (NETLINK_CB(cb->skb).pid && ret)
+		nlh->nlmsg_flags |= NLM_F_MULTI;
+	module_put(a_o->owner);
+	return skb->len;
+
+rtattr_failure:
+nlmsg_failure:
+	module_put(a_o->owner);
+	skb_trim(skb, b - skb->data);
+	return skb->len;
+}
+
+static int __init tc_action_init(void)
+{
+	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+
+	if (link_p) {
+		link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
+		link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
+		link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
+		link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
+	}
+
+	printk("TC classifier action (bugs to netdev@oss.sgi.com cc "
+	       "hadi@cyberus.ca)\n");
+	return 0;
+}
+
+subsys_initcall(tc_action_init);
+
+EXPORT_SYMBOL(tcf_register_action);
+EXPORT_SYMBOL(tcf_unregister_action);
+EXPORT_SYMBOL(tcf_action_exec);
+EXPORT_SYMBOL(tcf_action_dump_1);
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -0,0 +1,642 @@
+/*
+ * net/sched/cls_api.c	Packet classifier API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+#if 0 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+/* The list of all installed classifier types */
+
+static struct tcf_proto_ops *tcf_proto_base;
+
+/* Protects list of registered TC modules. It is pure SMP lock. */
+static DEFINE_RWLOCK(cls_mod_lock);
+
+/* Find classifier type by string name */
+
+static struct tcf_proto_ops * tcf_proto_lookup_ops(struct rtattr *kind)
+{
+	struct tcf_proto_ops *t = NULL;
+
+	if (kind) {
+		read_lock(&cls_mod_lock);
+		for (t = tcf_proto_base; t; t = t->next) {
+			if (rtattr_strcmp(kind, t->kind) == 0) {
+				if (!try_module_get(t->owner))
+					t = NULL;
+				break;
+			}
+		}
+		read_unlock(&cls_mod_lock);
+	}
+	return t;
+}
+
+/* Register(unregister) new classifier type */
+
+int register_tcf_proto_ops(struct tcf_proto_ops *ops)
+{
+	struct tcf_proto_ops *t, **tp;
+	int rc = -EEXIST;
+
+	write_lock(&cls_mod_lock);
+	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
+		if (!strcmp(ops->kind, t->kind))
+			goto out;
+
+	ops->next = NULL;
+	*tp = ops;
+	rc = 0;
+out:
+	write_unlock(&cls_mod_lock);
+	return rc;
+}
+
+int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
+{
+	struct tcf_proto_ops *t, **tp;
+	int rc = -ENOENT;
+
+	write_lock(&cls_mod_lock);
+	for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+		if (t == ops)
+			break;
+
+	if (!t)
+		goto out;
+	*tp = t->next;
+	rc = 0;
+out:
+	write_unlock(&cls_mod_lock);
+	return rc;
+}
+
+static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct tcf_proto *tp, unsigned long fh, int event);
+
+
+/* Select new prio value from the range, managed by kernel. */
+
+static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
+{
+	u32 first = TC_H_MAKE(0xC0000000U,0U);
+
+	if (tp)
+		first = tp->prio-1;
+
+	return first;
+}
+
+/* Add/change/delete/get a filter node */
+
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct rtattr **tca;
+	struct tcmsg *t;
+	u32 protocol;
+	u32 prio;
+	u32 nprio;
+	u32 parent;
+	struct net_device *dev;
+	struct Qdisc  *q;
+	struct tcf_proto **back, **chain;
+	struct tcf_proto *tp;
+	struct tcf_proto_ops *tp_ops;
+	struct Qdisc_class_ops *cops;
+	unsigned long cl;
+	unsigned long fh;
+	int err;
+
+replay:
+	tca = arg;
+	t = NLMSG_DATA(n);
+	protocol = TC_H_MIN(t->tcm_info);
+	prio = TC_H_MAJ(t->tcm_info);
+	nprio = prio;
+	parent = t->tcm_parent;
+	cl = 0;
+
+	if (prio == 0) {
+		/* If no priority is given, user wants we allocated it. */
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			return -ENOENT;
+		prio = TC_H_MAKE(0x80000000U,0U);
+	}
+
+	/* Find head of filter chain. */
+
+	/* Find link */
+	if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	/* Find qdisc */
+	if (!parent) {
+		q = dev->qdisc_sleeping;
+		parent = q->handle;
+	} else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL)
+		return -EINVAL;
+
+	/* Is it classful? */
+	if ((cops = q->ops->cl_ops) == NULL)
+		return -EINVAL;
+
+	/* Do we search for filter, attached to class? */
+	if (TC_H_MIN(parent)) {
+		cl = cops->get(q, parent);
+		if (cl == 0)
+			return -ENOENT;
+	}
+
+	/* And the last stroke */
+	chain = cops->tcf_chain(q, cl);
+	err = -EINVAL;
+	if (chain == NULL)
+		goto errout;
+
+	/* Check the chain for existence of proto-tcf with this priority */
+	for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+		if (tp->prio >= prio) {
+			if (tp->prio == prio) {
+				if (!nprio || (tp->protocol != protocol && protocol))
+					goto errout;
+			} else
+				tp = NULL;
+			break;
+		}
+	}
+
+	if (tp == NULL) {
+		/* Proto-tcf does not exist, create new one */
+
+		if (tca[TCA_KIND-1] == NULL || !protocol)
+			goto errout;
+
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto errout;
+
+
+		/* Create new proto tcf */
+
+		err = -ENOBUFS;
+		if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL)
+			goto errout;
+		err = -EINVAL;
+		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]);
+		if (tp_ops == NULL) {
+#ifdef CONFIG_KMOD
+			struct rtattr *kind = tca[TCA_KIND-1];
+			char name[IFNAMSIZ];
+
+			if (kind != NULL &&
+			    rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+				rtnl_unlock();
+				request_module("cls_%s", name);
+				rtnl_lock();
+				tp_ops = tcf_proto_lookup_ops(kind);
+				/* We dropped the RTNL semaphore in order to
+				 * perform the module load.  So, even if we
+				 * succeeded in loading the module we have to
+				 * replay the request.  We indicate this using
+				 * -EAGAIN.
+				 */
+				if (tp_ops != NULL) {
+					module_put(tp_ops->owner);
+					err = -EAGAIN;
+				}
+			}
+#endif
+			kfree(tp);
+			goto errout;
+		}
+		memset(tp, 0, sizeof(*tp));
+		tp->ops = tp_ops;
+		tp->protocol = protocol;
+		tp->prio = nprio ? : tcf_auto_prio(*back);
+		tp->q = q;
+		tp->classify = tp_ops->classify;
+		tp->classid = parent;
+		if ((err = tp_ops->init(tp)) != 0) {
+			module_put(tp_ops->owner);
+			kfree(tp);
+			goto errout;
+		}
+
+		qdisc_lock_tree(dev);
+		tp->next = *back;
+		*back = tp;
+		qdisc_unlock_tree(dev);
+
+	} else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind))
+		goto errout;
+
+	fh = tp->ops->get(tp, t->tcm_handle);
+
+	if (fh == 0) {
+		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
+			qdisc_lock_tree(dev);
+			*back = tp->next;
+			qdisc_unlock_tree(dev);
+
+			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			tcf_destroy(tp);
+			err = 0;
+			goto errout;
+		}
+
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto errout;
+	} else {
+		switch (n->nlmsg_type) {
+		case RTM_NEWTFILTER:	
+			err = -EEXIST;
+			if (n->nlmsg_flags&NLM_F_EXCL)
+				goto errout;
+			break;
+		case RTM_DELTFILTER:
+			err = tp->ops->delete(tp, fh);
+			if (err == 0)
+				tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			goto errout;
+		case RTM_GETTFILTER:
+			err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+			goto errout;
+		default:
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+
+	err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
+	if (err == 0)
+		tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+
+errout:
+	if (cl)
+		cops->put(q, cl);
+	if (err == -EAGAIN)
+		/* Replay the request. */
+		goto replay;
+	return err;
+}
+
+static int
+tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
+	      u32 pid, u32 seq, unsigned flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
+	nlh->nlmsg_flags = flags;
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = tp->q->dev->ifindex;
+	tcm->tcm_parent = tp->classid;
+	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind);
+	tcm->tcm_handle = fh;
+	if (RTM_DELTFILTER != event) {
+		tcm->tcm_handle = 0;
+		if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
+			goto rtattr_failure;
+	}
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct tcf_proto *tp, unsigned long fh, int event)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+}
+
+struct tcf_dump_args
+{
+	struct tcf_walker w;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg)
+{
+	struct tcf_dump_args *a = (void*)arg;
+
+	return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
+			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
+}
+
+static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct net_device *dev;
+	struct Qdisc *q;
+	struct tcf_proto *tp, **chain;
+	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	unsigned long cl = 0;
+	struct Qdisc_class_ops *cops;
+	struct tcf_dump_args arg;
+
+	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+		return skb->len;
+	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return skb->len;
+
+	read_lock_bh(&qdisc_tree_lock);
+	if (!tcm->tcm_parent)
+		q = dev->qdisc_sleeping;
+	else
+		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+	if (!q)
+		goto out;
+	if ((cops = q->ops->cl_ops) == NULL)
+		goto errout;
+	if (TC_H_MIN(tcm->tcm_parent)) {
+		cl = cops->get(q, tcm->tcm_parent);
+		if (cl == 0)
+			goto errout;
+	}
+	chain = cops->tcf_chain(q, cl);
+	if (chain == NULL)
+		goto errout;
+
+	s_t = cb->args[0];
+
+	for (tp=*chain, t=0; tp; tp = tp->next, t++) {
+		if (t < s_t) continue;
+		if (TC_H_MAJ(tcm->tcm_info) &&
+		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
+			continue;
+		if (TC_H_MIN(tcm->tcm_info) &&
+		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		if (cb->args[1] == 0) {
+			if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) {
+				break;
+			}
+			cb->args[1] = 1;
+		}
+		if (tp->ops->walk == NULL)
+			continue;
+		arg.w.fn = tcf_node_dump;
+		arg.skb = skb;
+		arg.cb = cb;
+		arg.w.stop = 0;
+		arg.w.skip = cb->args[1]-1;
+		arg.w.count = 0;
+		tp->ops->walk(tp, &arg.w);
+		cb->args[1] = arg.w.count+1;
+		if (arg.w.stop)
+			break;
+	}
+
+	cb->args[0] = t;
+
+errout:
+	if (cl)
+		cops->put(q, cl);
+out:
+	read_unlock_bh(&qdisc_tree_lock);
+	dev_put(dev);
+	return skb->len;
+}
+
+void
+tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->action) {
+		tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
+		exts->action = NULL;
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (exts->police) {
+		tcf_police_release(exts->police, TCA_ACT_UNBIND);
+		exts->police = NULL;
+	}
+#endif
+}
+
+
+int
+tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
+	          struct rtattr *rate_tlv, struct tcf_exts *exts,
+	          struct tcf_ext_map *map)
+{
+	memset(exts, 0, sizeof(*exts));
+	
+#ifdef CONFIG_NET_CLS_ACT
+	{
+		int err;
+		struct tc_action *act;
+
+		if (map->police && tb[map->police-1]) {
+			act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police",
+				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+			if (act == NULL)
+				return err;
+
+			act->type = TCA_OLD_COMPAT;
+			exts->action = act;
+		} else if (map->action && tb[map->action-1]) {
+			act = tcf_action_init(tb[map->action-1], rate_tlv, NULL,
+				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+			if (act == NULL)
+				return err;
+
+			exts->action = act;
+		}
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (map->police && tb[map->police-1]) {
+		struct tcf_police *p;
+		
+		p = tcf_police_locate(tb[map->police-1], rate_tlv);
+		if (p == NULL)
+			return -EINVAL;
+
+		exts->police = p;
+	} else if (map->action && tb[map->action-1])
+		return -EOPNOTSUPP;
+#else
+	if ((map->action && tb[map->action-1]) ||
+	    (map->police && tb[map->police-1]))
+		return -EOPNOTSUPP;
+#endif
+
+	return 0;
+}
+
+void
+tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
+	        struct tcf_exts *src)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (src->action) {
+		struct tc_action *act;
+		tcf_tree_lock(tp);
+		act = xchg(&dst->action, src->action);
+		tcf_tree_unlock(tp);
+		if (act)
+			tcf_action_destroy(act, TCA_ACT_UNBIND);
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (src->police) {
+		struct tcf_police *p;
+		tcf_tree_lock(tp);
+		p = xchg(&dst->police, src->police);
+		tcf_tree_unlock(tp);
+		if (p)
+			tcf_police_release(p, TCA_ACT_UNBIND);
+	}
+#endif
+}
+
+int
+tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
+	      struct tcf_ext_map *map)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (map->action && exts->action) {
+		/*
+		 * again for backward compatible mode - we want
+		 * to work with both old and new modes of entering
+		 * tc data even if iproute2  was newer - jhs
+		 */
+		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+
+		if (exts->action->type != TCA_OLD_COMPAT) {
+			RTA_PUT(skb, map->action, 0, NULL);
+			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
+				goto rtattr_failure;
+			p_rta->rta_len = skb->tail - (u8*)p_rta;
+		} else if (map->police) {
+			RTA_PUT(skb, map->police, 0, NULL);
+			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
+				goto rtattr_failure;
+			p_rta->rta_len = skb->tail - (u8*)p_rta;
+		}
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (map->police && exts->police) {
+		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+
+		RTA_PUT(skb, map->police, 0, NULL);
+
+		if (tcf_police_dump(skb, exts->police) < 0)
+			goto rtattr_failure;
+
+		p_rta->rta_len = skb->tail - (u8*)p_rta;
+	}
+#endif
+	return 0;
+rtattr_failure: __attribute__ ((unused))
+	return -1;
+}
+
+int
+tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
+	            struct tcf_ext_map *map)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->action)
+		if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
+			goto rtattr_failure;
+#elif defined CONFIG_NET_CLS_POLICE
+	if (exts->police)
+		if (tcf_police_dump_stats(skb, exts->police) < 0)
+			goto rtattr_failure;
+#endif
+	return 0;
+rtattr_failure: __attribute__ ((unused))
+	return -1;
+}
+
+static int __init tc_filter_init(void)
+{
+	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+
+	/* Setup rtnetlink links. It is made here to avoid
+	   exporting large number of public symbols.
+	 */
+
+	if (link_p) {
+		link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
+		link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
+		link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
+		link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
+	}
+	return 0;
+}
+
+subsys_initcall(tc_filter_init);
+
+EXPORT_SYMBOL(register_tcf_proto_ops);
+EXPORT_SYMBOL(unregister_tcf_proto_ops);
+EXPORT_SYMBOL(tcf_exts_validate);
+EXPORT_SYMBOL(tcf_exts_destroy);
+EXPORT_SYMBOL(tcf_exts_change);
+EXPORT_SYMBOL(tcf_exts_dump);
+EXPORT_SYMBOL(tcf_exts_dump_stats);
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -0,0 +1,303 @@
+/*
+ * net/sched/cls_basic.c	Basic Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct basic_head
+{
+	u32			hgenerator;
+	struct list_head	flist;
+};
+
+struct basic_filter
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+};
+
+static struct tcf_ext_map basic_ext_map = {
+	.action = TCA_BASIC_ACT,
+	.police = TCA_BASIC_POLICE
+};
+
+static int basic_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	int r;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+		*res = f->res;
+		r = tcf_exts_exec(skb, &f->exts, res);
+		if (r < 0)
+			continue;
+		return r;
+	}
+	return -1;
+}
+
+static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+static void basic_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int basic_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void basic_delete_filter(struct tcf_proto *tp,
+				       struct basic_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+static void basic_destroy(struct tcf_proto *tp)
+{
+	struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+	struct basic_filter *f, *n;
+	
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		basic_delete_filter(tp, f);
+	}
+}
+
+static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *t, *f = (struct basic_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			basic_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+				  unsigned long base, struct rtattr **tb,
+				  struct rtattr *est)
+{
+	int err = -EINVAL;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	if (tb[TCA_BASIC_CLASSID-1])
+		if (RTA_PAYLOAD(tb[TCA_BASIC_CLASSID-1]) < sizeof(u32))
+			return err;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES-1], &t);
+	if (err < 0)
+		goto errout;
+
+	if (tb[TCA_BASIC_CLASSID-1]) {
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_BASIC_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+		        struct rtattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct rtattr *tb[TCA_BASIC_MAX];
+	struct basic_filter *f = (struct basic_filter *) *arg;
+
+	if (tca[TCA_OPTIONS-1] == NULL)
+		return -EINVAL;
+
+	if (rtattr_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS-1]) < 0)
+		return -EINVAL;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			return -EINVAL;
+		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	}
+
+	err = -ENOBUFS;
+	if (head == NULL) {
+		head = kmalloc(sizeof(*head), GFP_KERNEL);
+		if (head == NULL)
+			goto errout;
+
+		memset(head, 0, sizeof(*head));
+		INIT_LIST_HEAD(&head->flist);
+		tp->root = head;
+	}
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+	memset(f, 0, sizeof(*f));
+
+	err = -EINVAL;
+	if (handle)
+		f->handle = handle;
+	else {
+		int i = 0x80000000;
+		do {
+			if (++head->hgenerator == 0x7FFFFFFF)
+				head->hgenerator = 1;
+		} while (--i > 0 && basic_get(tp, head->hgenerator));
+
+		if (i <= 0) {
+			printk(KERN_ERR "Insufficient number of handles\n");
+			goto errout;
+		}
+
+		f->handle = head->hgenerator;
+	}
+
+	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	if (err < 0)
+		goto errout;
+
+	tcf_tree_lock(tp);
+	list_add(&f->link, &head->flist);
+	tcf_tree_unlock(tp);
+	*arg = (unsigned long) f;
+
+	return 0;
+errout:
+	if (*arg == 0UL && f)
+		kfree(f);
+
+	return err;
+}
+
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int basic_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct basic_filter *f = (struct basic_filter *) fh;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = (skb->tail - b);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_basic_ops = {
+	.kind		=	"basic",
+	.classify	=	basic_classify,
+	.init		=	basic_init,
+	.destroy	=	basic_destroy,
+	.get		=	basic_get,
+	.put		=	basic_put,
+	.change		=	basic_change,
+	.delete		=	basic_delete,
+	.walk		=	basic_walk,
+	.dump		=	basic_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_basic(void)
+{
+	return register_tcf_proto_ops(&cls_basic_ops);
+}
+
+static void __exit exit_basic(void) 
+{
+	unregister_tcf_proto_ops(&cls_basic_ops);
+}
+
+module_init(init_basic)
+module_exit(exit_basic)
+MODULE_LICENSE("GPL");
+
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -0,0 +1,378 @@
+/*
+ * net/sched/cls_fw.c	Classifier mapping ipchains' fwmark to traffic class.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
+ * Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
+ *
+ * JHS: We should remove the CONFIG_NET_CLS_IND from here
+ * eventually when the meta match extension is made available
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/netfilter.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct fw_head
+{
+	struct fw_filter *ht[256];
+};
+
+struct fw_filter
+{
+	struct fw_filter	*next;
+	u32			id;
+	struct tcf_result	res;
+#ifdef CONFIG_NET_CLS_IND
+	char			indev[IFNAMSIZ];
+#endif /* CONFIG_NET_CLS_IND */
+	struct tcf_exts		exts;
+};
+
+static struct tcf_ext_map fw_ext_map = {
+	.action = TCA_FW_ACT,
+	.police = TCA_FW_POLICE
+};
+
+static __inline__ int fw_hash(u32 handle)
+{
+	return handle&0xFF;
+}
+
+static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f;
+	int r;
+#ifdef CONFIG_NETFILTER
+	u32 id = skb->nfmark;
+#else
+	u32 id = 0;
+#endif
+
+	if (head != NULL) {
+		for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+			if (f->id == id) {
+				*res = f->res;
+#ifdef CONFIG_NET_CLS_IND
+				if (!tcf_match_indev(skb, f->indev))
+					continue;
+#endif /* CONFIG_NET_CLS_IND */
+				r = tcf_exts_exec(skb, &f->exts, res);
+				if (r < 0)
+					continue;
+
+				return r;
+			}
+		}
+	} else {
+		/* old method */
+		if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+			res->classid = id;
+			res->class = 0;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f;
+
+	if (head == NULL)
+		return 0;
+
+	for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+		if (f->id == handle)
+			return (unsigned long)f;
+	}
+	return 0;
+}
+
+static void fw_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int fw_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void
+fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void fw_destroy(struct tcf_proto *tp)
+{
+	struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+	struct fw_filter *f;
+	int h;
+
+	if (head == NULL)
+		return;
+
+	for (h=0; h<256; h++) {
+		while ((f=head->ht[h]) != NULL) {
+			head->ht[h] = f->next;
+			fw_delete_filter(tp, f);
+		}
+	}
+	kfree(head);
+}
+
+static int fw_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f = (struct fw_filter*)arg;
+	struct fw_filter **fp;
+
+	if (head == NULL || f == NULL)
+		goto out;
+
+	for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+			fw_delete_filter(tp, f);
+			return 0;
+		}
+	}
+out:
+	return -EINVAL;
+}
+
+static int
+fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
+	struct rtattr **tb, struct rtattr **tca, unsigned long base)
+{
+	struct tcf_exts e;
+	int err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_FW_CLASSID-1]) {
+		if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != sizeof(u32))
+			goto errout;
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_FW_INDEV-1]) {
+		err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV-1]);
+		if (err < 0)
+			goto errout;
+	}
+#endif /* CONFIG_NET_CLS_IND */
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int fw_change(struct tcf_proto *tp, unsigned long base,
+		     u32 handle,
+		     struct rtattr **tca,
+		     unsigned long *arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f = (struct fw_filter *) *arg;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_FW_MAX];
+	int err;
+
+	if (!opt)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_FW_MAX, opt) < 0)
+		return -EINVAL;
+
+	if (f != NULL) {
+		if (f->id != handle && handle)
+			return -EINVAL;
+		return fw_change_attrs(tp, f, tb, tca, base);
+	}
+
+	if (!handle)
+		return -EINVAL;
+
+	if (head == NULL) {
+		head = kmalloc(sizeof(struct fw_head), GFP_KERNEL);
+		if (head == NULL)
+			return -ENOBUFS;
+		memset(head, 0, sizeof(*head));
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL);
+	if (f == NULL)
+		return -ENOBUFS;
+	memset(f, 0, sizeof(*f));
+
+	f->id = handle;
+
+	err = fw_change_attrs(tp, f, tb, tca, base);
+	if (err < 0)
+		goto errout;
+
+	f->next = head->ht[fw_hash(handle)];
+	tcf_tree_lock(tp);
+	head->ht[fw_hash(handle)] = f;
+	tcf_tree_unlock(tp);
+
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	if (f)
+		kfree(f);
+	return err;
+}
+
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	int h;
+
+	if (head == NULL)
+		arg->stop = 1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < 256; h++) {
+		struct fw_filter *f;
+
+		for (f = head->ht[h]; f; f = f->next) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static int fw_dump(struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct fw_filter *f = (struct fw_filter*)fh;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->id;
+
+	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+		return skb->len;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (f->res.classid)
+		RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
+#ifdef CONFIG_NET_CLS_IND
+	if (strlen(f->indev))
+		RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
+#endif /* CONFIG_NET_CLS_IND */
+
+	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb->tail - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
+		goto rtattr_failure;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fw_ops = {
+	.next		=	NULL,
+	.kind		=	"fw",
+	.classify	=	fw_classify,
+	.init		=	fw_init,
+	.destroy	=	fw_destroy,
+	.get		=	fw_get,
+	.put		=	fw_put,
+	.change		=	fw_change,
+	.delete		=	fw_delete,
+	.walk		=	fw_walk,
+	.dump		=	fw_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_fw(void)
+{
+	return register_tcf_proto_ops(&cls_fw_ops);
+}
+
+static void __exit exit_fw(void) 
+{
+	unregister_tcf_proto_ops(&cls_fw_ops);
+}
+
+module_init(init_fw)
+module_exit(exit_fw)
+MODULE_LICENSE("GPL");
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -0,0 +1,639 @@
+/*
+ * net/sched/cls_route.c	ROUTE4 classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+/*
+   1. For now we assume that route tags < 256.
+      It allows to use direct table lookups, instead of hash tables.
+   2. For now we assume that "from TAG" and "fromdev DEV" statements
+      are mutually  exclusive.
+   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ */
+
+struct route4_fastmap
+{
+	struct route4_filter	*filter;
+	u32			id;
+	int			iif;
+};
+
+struct route4_head
+{
+	struct route4_fastmap	fastmap[16];
+	struct route4_bucket	*table[256+1];
+};
+
+struct route4_bucket
+{
+	/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
+	struct route4_filter	*ht[16+16+1];
+};
+
+struct route4_filter
+{
+	struct route4_filter	*next;
+	u32			id;
+	int			iif;
+
+	struct tcf_result	res;
+	struct tcf_exts		exts;
+	u32			handle;
+	struct route4_bucket	*bkt;
+};
+
+#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+
+static struct tcf_ext_map route_ext_map = {
+	.police = TCA_ROUTE4_POLICE,
+	.action = TCA_ROUTE4_ACT
+};
+
+static __inline__ int route4_fastmap_hash(u32 id, int iif)
+{
+	return id&0xF;
+}
+
+static inline
+void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
+{
+	spin_lock_bh(&dev->queue_lock);
+	memset(head->fastmap, 0, sizeof(head->fastmap));
+	spin_unlock_bh(&dev->queue_lock);
+}
+
+static void __inline__
+route4_set_fastmap(struct route4_head *head, u32 id, int iif,
+		   struct route4_filter *f)
+{
+	int h = route4_fastmap_hash(id, iif);
+	head->fastmap[h].id = id;
+	head->fastmap[h].iif = iif;
+	head->fastmap[h].filter = f;
+}
+
+static __inline__ int route4_hash_to(u32 id)
+{
+	return id&0xFF;
+}
+
+static __inline__ int route4_hash_from(u32 id)
+{
+	return (id>>16)&0xF;
+}
+
+static __inline__ int route4_hash_iif(int iif)
+{
+	return 16 + ((iif>>16)&0xF);
+}
+
+static __inline__ int route4_hash_wild(void)
+{
+	return 32;
+}
+
+#define ROUTE4_APPLY_RESULT()					\
+{								\
+	*res = f->res;						\
+	if (tcf_exts_is_available(&f->exts)) {			\
+		int r = tcf_exts_exec(skb, &f->exts, res);	\
+		if (r < 0) {					\
+			dont_cache = 1;				\
+			continue;				\
+		}						\
+		return r;					\
+	} else if (!dont_cache)					\
+		route4_set_fastmap(head, id, iif, f);		\
+	return 0;						\
+}
+
+static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			   struct tcf_result *res)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct dst_entry *dst;
+	struct route4_bucket *b;
+	struct route4_filter *f;
+	u32 id, h;
+	int iif, dont_cache = 0;
+
+	if ((dst = skb->dst) == NULL)
+		goto failure;
+
+	id = dst->tclassid;
+	if (head == NULL)
+		goto old_method;
+
+	iif = ((struct rtable*)dst)->fl.iif;
+
+	h = route4_fastmap_hash(id, iif);
+	if (id == head->fastmap[h].id &&
+	    iif == head->fastmap[h].iif &&
+	    (f = head->fastmap[h].filter) != NULL) {
+		if (f == ROUTE4_FAILURE)
+			goto failure;
+
+		*res = f->res;
+		return 0;
+	}
+
+	h = route4_hash_to(id);
+
+restart:
+	if ((b = head->table[h]) != NULL) {
+		for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
+			if (f->id == id)
+				ROUTE4_APPLY_RESULT();
+
+		for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next)
+			if (f->iif == iif)
+				ROUTE4_APPLY_RESULT();
+
+		for (f = b->ht[route4_hash_wild()]; f; f = f->next)
+			ROUTE4_APPLY_RESULT();
+
+	}
+	if (h < 256) {
+		h = 256;
+		id &= ~0xFFFF;
+		goto restart;
+	}
+
+	if (!dont_cache)
+		route4_set_fastmap(head, id, iif, ROUTE4_FAILURE);
+failure:
+	return -1;
+
+old_method:
+	if (id && (TC_H_MAJ(id) == 0 ||
+		   !(TC_H_MAJ(id^tp->q->handle)))) {
+		res->classid = id;
+		res->class = 0;
+		return 0;
+	}
+	return -1;
+}
+
+static inline u32 to_hash(u32 id)
+{
+	u32 h = id&0xFF;
+	if (id&0x8000)
+		h += 256;
+	return h;
+}
+
+static inline u32 from_hash(u32 id)
+{
+	id &= 0xFFFF;
+	if (id == 0xFFFF)
+		return 32;
+	if (!(id & 0x8000)) {
+		if (id > 255)
+			return 256;
+		return id&0xF;
+	}
+	return 16 + (id&0xF);
+}
+
+static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_bucket *b;
+	struct route4_filter *f;
+	unsigned h1, h2;
+
+	if (!head)
+		return 0;
+
+	h1 = to_hash(handle);
+	if (h1 > 256)
+		return 0;
+
+	h2 = from_hash(handle>>16);
+	if (h2 > 32)
+		return 0;
+
+	if ((b = head->table[h1]) != NULL) {
+		for (f = b->ht[h2]; f; f = f->next)
+			if (f->handle == handle)
+				return (unsigned long)f;
+	}
+	return 0;
+}
+
+static void route4_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int route4_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void
+route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void route4_destroy(struct tcf_proto *tp)
+{
+	struct route4_head *head = xchg(&tp->root, NULL);
+	int h1, h2;
+
+	if (head == NULL)
+		return;
+
+	for (h1=0; h1<=256; h1++) {
+		struct route4_bucket *b;
+
+		if ((b = head->table[h1]) != NULL) {
+			for (h2=0; h2<=32; h2++) {
+				struct route4_filter *f;
+
+				while ((f = b->ht[h2]) != NULL) {
+					b->ht[h2] = f->next;
+					route4_delete_filter(tp, f);
+				}
+			}
+			kfree(b);
+		}
+	}
+	kfree(head);
+}
+
+static int route4_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_filter **fp, *f = (struct route4_filter*)arg;
+	unsigned h = 0;
+	struct route4_bucket *b;
+	int i;
+
+	if (!head || !f)
+		return -EINVAL;
+
+	h = f->handle;
+	b = f->bkt;
+
+	for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+
+			route4_reset_fastmap(tp->q->dev, head, f->id);
+			route4_delete_filter(tp, f);
+
+			/* Strip tree */
+
+			for (i=0; i<=32; i++)
+				if (b->ht[i])
+					return 0;
+
+			/* OK, session has no flows */
+			tcf_tree_lock(tp);
+			head->table[to_hash(h)] = NULL;
+			tcf_tree_unlock(tp);
+
+			kfree(b);
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
+	struct route4_filter *f, u32 handle, struct route4_head *head,
+	struct rtattr **tb, struct rtattr *est, int new)
+{
+	int err;
+	u32 id = 0, to = 0, nhandle = 0x8000;
+	struct route4_filter *fp;
+	unsigned int h1;
+	struct route4_bucket *b;
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_ROUTE4_CLASSID-1])
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < sizeof(u32))
+			goto errout;
+
+	if (tb[TCA_ROUTE4_TO-1]) {
+		if (new && handle & 0x8000)
+			goto errout;
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < sizeof(u32))
+			goto errout;
+		to = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]);
+		if (to > 0xFF)
+			goto errout;
+		nhandle = to;
+	}
+
+	if (tb[TCA_ROUTE4_FROM-1]) {
+		if (tb[TCA_ROUTE4_IIF-1])
+			goto errout;
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < sizeof(u32))
+			goto errout;
+		id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1]);
+		if (id > 0xFF)
+			goto errout;
+		nhandle |= id << 16;
+	} else if (tb[TCA_ROUTE4_IIF-1]) {
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < sizeof(u32))
+			goto errout;
+		id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
+		if (id > 0x7FFF)
+			goto errout;
+		nhandle |= (id | 0x8000) << 16;
+	} else
+		nhandle |= 0xFFFF << 16;
+
+	if (handle && new) {
+		nhandle |= handle & 0x7F00;
+		if (nhandle != handle)
+			goto errout;
+	}
+
+	h1 = to_hash(nhandle);
+	if ((b = head->table[h1]) == NULL) {
+		err = -ENOBUFS;
+		b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL);
+		if (b == NULL)
+			goto errout;
+		memset(b, 0, sizeof(*b));
+
+		tcf_tree_lock(tp);
+		head->table[h1] = b;
+		tcf_tree_unlock(tp);
+	} else {
+		unsigned int h2 = from_hash(nhandle >> 16);
+		err = -EEXIST;
+		for (fp = b->ht[h2]; fp; fp = fp->next)
+			if (fp->handle == f->handle)
+				goto errout;
+	}
+
+	tcf_tree_lock(tp);
+	if (tb[TCA_ROUTE4_TO-1])
+		f->id = to;
+
+	if (tb[TCA_ROUTE4_FROM-1])
+		f->id = to | id<<16;
+	else if (tb[TCA_ROUTE4_IIF-1])
+		f->iif = id;
+
+	f->handle = nhandle;
+	f->bkt = b;
+	tcf_tree_unlock(tp);
+
+	if (tb[TCA_ROUTE4_CLASSID-1]) {
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int route4_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle,
+		       struct rtattr **tca,
+		       unsigned long *arg)
+{
+	struct route4_head *head = tp->root;
+	struct route4_filter *f, *f1, **fp;
+	struct route4_bucket *b;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_ROUTE4_MAX];
+	unsigned int h, th;
+	u32 old_handle = 0;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_ROUTE4_MAX, opt) < 0)
+		return -EINVAL;
+
+	if ((f = (struct route4_filter*)*arg) != NULL) {
+		if (f->handle != handle && handle)
+			return -EINVAL;
+
+		if (f->bkt)
+			old_handle = f->handle;
+
+		err = route4_set_parms(tp, base, f, handle, head, tb,
+			tca[TCA_RATE-1], 0);
+		if (err < 0)
+			return err;
+
+		goto reinsert;
+	}
+
+	err = -ENOBUFS;
+	if (head == NULL) {
+		head = kmalloc(sizeof(struct route4_head), GFP_KERNEL);
+		if (head == NULL)
+			goto errout;
+		memset(head, 0, sizeof(struct route4_head));
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+	memset(f, 0, sizeof(*f));
+
+	err = route4_set_parms(tp, base, f, handle, head, tb,
+		tca[TCA_RATE-1], 1);
+	if (err < 0)
+		goto errout;
+
+reinsert:
+	h = from_hash(f->handle >> 16);
+	for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+		if (f->handle < f1->handle)
+			break;
+
+	f->next = f1;
+	tcf_tree_lock(tp);
+	*fp = f;
+
+	if (old_handle && f->handle != old_handle) {
+		th = to_hash(old_handle);
+		h = from_hash(old_handle >> 16);
+		if ((b = head->table[th]) != NULL) {
+			for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
+				if (*fp == f) {
+					*fp = f->next;
+					break;
+				}
+			}
+		}
+	}
+	tcf_tree_unlock(tp);
+
+	route4_reset_fastmap(tp->q->dev, head, f->id);
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	if (f)
+		kfree(f);
+	return err;
+}
+
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct route4_head *head = tp->root;
+	unsigned h, h1;
+
+	if (head == NULL)
+		arg->stop = 1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h <= 256; h++) {
+		struct route4_bucket *b = head->table[h];
+
+		if (b) {
+			for (h1 = 0; h1 <= 32; h1++) {
+				struct route4_filter *f;
+
+				for (f = b->ht[h1]; f; f = f->next) {
+					if (arg->count < arg->skip) {
+						arg->count++;
+						continue;
+					}
+					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+						arg->stop = 1;
+						return;
+					}
+					arg->count++;
+				}
+			}
+		}
+	}
+}
+
+static int route4_dump(struct tcf_proto *tp, unsigned long fh,
+		       struct sk_buff *skb, struct tcmsg *t)
+{
+	struct route4_filter *f = (struct route4_filter*)fh;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	u32 id;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (!(f->handle&0x8000)) {
+		id = f->id&0xFF;
+		RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id);
+	}
+	if (f->handle&0x80000000) {
+		if ((f->handle>>16) != 0xFFFF)
+			RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif);
+	} else {
+		id = f->id>>16;
+		RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id);
+	}
+	if (f->res.classid)
+		RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid);
+
+	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb->tail - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
+		goto rtattr_failure;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_route4_ops = {
+	.next		=	NULL,
+	.kind		=	"route",
+	.classify	=	route4_classify,
+	.init		=	route4_init,
+	.destroy	=	route4_destroy,
+	.get		=	route4_get,
+	.put		=	route4_put,
+	.change		=	route4_change,
+	.delete		=	route4_delete,
+	.walk		=	route4_walk,
+	.dump		=	route4_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_route4(void)
+{
+	return register_tcf_proto_ops(&cls_route4_ops);
+}
+
+static void __exit exit_route4(void)
+{
+	unregister_tcf_proto_ops(&cls_route4_ops);
+}
+
+module_init(init_route4)
+module_exit(exit_route4)
+MODULE_LICENSE("GPL");
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -0,0 +1,43 @@
+/*
+ * net/sched/cls_rsvp.c	Special RSVP packet classifier for IPv4.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+#define RSVP_DST_LEN	1
+#define RSVP_ID		"rsvp"
+#define RSVP_OPS	cls_rsvp_ops
+
+#include "cls_rsvp.h"
+MODULE_LICENSE("GPL");
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -0,0 +1,667 @@
+/*
+ * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+/*
+   Comparing to general packet classification problem,
+   RSVP needs only sevaral relatively simple rules:
+
+   * (dst, protocol) are always specified,
+     so that we are able to hash them.
+   * src may be exact, or may be wildcard, so that
+     we can keep a hash table plus one wildcard entry.
+   * source port (or flow label) is important only if src is given.
+
+   IMPLEMENTATION.
+
+   We use a two level hash table: The top level is keyed by
+   destination address and protocol ID, every bucket contains a list
+   of "rsvp sessions", identified by destination address, protocol and
+   DPI(="Destination Port ID"): triple (key, mask, offset).
+
+   Every bucket has a smaller hash table keyed by source address
+   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
+   Every bucket is again a list of "RSVP flows", selected by
+   source address and SPI(="Source Port ID" here rather than
+   "security parameter index"): triple (key, mask, offset).
+
+
+   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
+   and all fragmented packets go to the best-effort traffic class.
+
+
+   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
+   only one "Generalized Port Identifier". So that for classic
+   ah, esp (and udp,tcp) both *pi should coincide or one of them
+   should be wildcard.
+
+   At first sight, this redundancy is just a waste of CPU
+   resources. But DPI and SPI add the possibility to assign different
+   priorities to GPIs. Look also at note 4 about tunnels below.
+
+
+   NOTE 3. One complication is the case of tunneled packets.
+   We implement it as following: if the first lookup
+   matches a special session with "tunnelhdr" value not zero,
+   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
+   In this case, we pull tunnelhdr bytes and restart lookup
+   with tunnel ID added to the list of keys. Simple and stupid 8)8)
+   It's enough for PIMREG and IPIP.
+
+
+   NOTE 4. Two GPIs make it possible to parse even GRE packets.
+   F.e. DPI can select ETH_P_IP (and necessary flags to make
+   tunnelhdr correct) in GRE protocol field and SPI matches
+   GRE key. Is it not nice? 8)8)
+
+
+   Well, as result, despite its simplicity, we get a pretty
+   powerful classification engine.  */
+
+#include <linux/config.h>
+
+struct rsvp_head
+{
+	u32			tmap[256/32];
+	u32			hgenerator;
+	u8			tgenerator;
+	struct rsvp_session	*ht[256];
+};
+
+struct rsvp_session
+{
+	struct rsvp_session	*next;
+	u32			dst[RSVP_DST_LEN];
+	struct tc_rsvp_gpi 	dpi;
+	u8			protocol;
+	u8			tunnelid;
+	/* 16 (src,sport) hash slots, and one wildcard source slot */
+	struct rsvp_filter	*ht[16+1];
+};
+
+
+struct rsvp_filter
+{
+	struct rsvp_filter	*next;
+	u32			src[RSVP_DST_LEN];
+	struct tc_rsvp_gpi	spi;
+	u8			tunnelhdr;
+
+	struct tcf_result	res;
+	struct tcf_exts		exts;
+
+	u32			handle;
+	struct rsvp_session	*sess;
+};
+
+static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
+{
+	unsigned h = dst[RSVP_DST_LEN-1];
+	h ^= h>>16;
+	h ^= h>>8;
+	return (h ^ protocol ^ tunnelid) & 0xFF;
+}
+
+static __inline__ unsigned hash_src(u32 *src)
+{
+	unsigned h = src[RSVP_DST_LEN-1];
+	h ^= h>>16;
+	h ^= h>>8;
+	h ^= h>>4;
+	return h & 0xF;
+}
+
+static struct tcf_ext_map rsvp_ext_map = {
+	.police = TCA_RSVP_POLICE,
+	.action = TCA_RSVP_ACT
+};
+
+#define RSVP_APPLY_RESULT()				\
+{							\
+	int r = tcf_exts_exec(skb, &f->exts, res);	\
+	if (r < 0)					\
+		continue;				\
+	else if (r > 0)					\
+		return r;				\
+}
+	
+static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			 struct tcf_result *res)
+{
+	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session *s;
+	struct rsvp_filter *f;
+	unsigned h1, h2;
+	u32 *dst, *src;
+	u8 protocol;
+	u8 tunnelid = 0;
+	u8 *xprt;
+#if RSVP_DST_LEN == 4
+	struct ipv6hdr *nhptr = skb->nh.ipv6h;
+#else
+	struct iphdr *nhptr = skb->nh.iph;
+#endif
+
+restart:
+
+#if RSVP_DST_LEN == 4
+	src = &nhptr->saddr.s6_addr32[0];
+	dst = &nhptr->daddr.s6_addr32[0];
+	protocol = nhptr->nexthdr;
+	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+#else
+	src = &nhptr->saddr;
+	dst = &nhptr->daddr;
+	protocol = nhptr->protocol;
+	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
+	if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
+		return -1;
+#endif
+
+	h1 = hash_dst(dst, protocol, tunnelid);
+	h2 = hash_src(src);
+
+	for (s = sht[h1]; s; s = s->next) {
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		    protocol == s->protocol &&
+		    !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
+#if RSVP_DST_LEN == 4
+		    && dst[0] == s->dst[0]
+		    && dst[1] == s->dst[1]
+		    && dst[2] == s->dst[2]
+#endif
+		    && tunnelid == s->tunnelid) {
+
+			for (f = s->ht[h2]; f; f = f->next) {
+				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
+				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+#if RSVP_DST_LEN == 4
+				    && src[0] == f->src[0]
+				    && src[1] == f->src[1]
+				    && src[2] == f->src[2]
+#endif
+				    ) {
+					*res = f->res;
+					RSVP_APPLY_RESULT();
+
+matched:
+					if (f->tunnelhdr == 0)
+						return 0;
+
+					tunnelid = f->res.classid;
+					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+					goto restart;
+				}
+			}
+
+			/* And wildcard bucket... */
+			for (f = s->ht[16]; f; f = f->next) {
+				*res = f->res;
+				RSVP_APPLY_RESULT();
+				goto matched;
+			}
+			return -1;
+		}
+	}
+	return -1;
+}
+
+static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+{
+	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session *s;
+	struct rsvp_filter *f;
+	unsigned h1 = handle&0xFF;
+	unsigned h2 = (handle>>8)&0xFF;
+
+	if (h2 > 16)
+		return 0;
+
+	for (s = sht[h1]; s; s = s->next) {
+		for (f = s->ht[h2]; f; f = f->next) {
+			if (f->handle == handle)
+				return (unsigned long)f;
+		}
+	}
+	return 0;
+}
+
+static void rsvp_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int rsvp_init(struct tcf_proto *tp)
+{
+	struct rsvp_head *data;
+
+	data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
+	if (data) {
+		memset(data, 0, sizeof(struct rsvp_head));
+		tp->root = data;
+		return 0;
+	}
+	return -ENOBUFS;
+}
+
+static inline void
+rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void rsvp_destroy(struct tcf_proto *tp)
+{
+	struct rsvp_head *data = xchg(&tp->root, NULL);
+	struct rsvp_session **sht;
+	int h1, h2;
+
+	if (data == NULL)
+		return;
+
+	sht = data->ht;
+
+	for (h1=0; h1<256; h1++) {
+		struct rsvp_session *s;
+
+		while ((s = sht[h1]) != NULL) {
+			sht[h1] = s->next;
+
+			for (h2=0; h2<=16; h2++) {
+				struct rsvp_filter *f;
+
+				while ((f = s->ht[h2]) != NULL) {
+					s->ht[h2] = f->next;
+					rsvp_delete_filter(tp, f);
+				}
+			}
+			kfree(s);
+		}
+	}
+	kfree(data);
+}
+
+static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
+	unsigned h = f->handle;
+	struct rsvp_session **sp;
+	struct rsvp_session *s = f->sess;
+	int i;
+
+	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+			rsvp_delete_filter(tp, f);
+
+			/* Strip tree */
+
+			for (i=0; i<=16; i++)
+				if (s->ht[i])
+					return 0;
+
+			/* OK, session has no flows */
+			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+			     *sp; sp = &(*sp)->next) {
+				if (*sp == s) {
+					tcf_tree_lock(tp);
+					*sp = s->next;
+					tcf_tree_unlock(tp);
+
+					kfree(s);
+					return 0;
+				}
+			}
+
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+{
+	struct rsvp_head *data = tp->root;
+	int i = 0xFFFF;
+
+	while (i-- > 0) {
+		u32 h;
+		if ((data->hgenerator += 0x10000) == 0)
+			data->hgenerator = 0x10000;
+		h = data->hgenerator|salt;
+		if (rsvp_get(tp, h) == 0)
+			return h;
+	}
+	return 0;
+}
+
+static int tunnel_bts(struct rsvp_head *data)
+{
+	int n = data->tgenerator>>5;
+	u32 b = 1<<(data->tgenerator&0x1F);
+	
+	if (data->tmap[n]&b)
+		return 0;
+	data->tmap[n] |= b;
+	return 1;
+}
+
+static void tunnel_recycle(struct rsvp_head *data)
+{
+	struct rsvp_session **sht = data->ht;
+	u32 tmap[256/32];
+	int h1, h2;
+
+	memset(tmap, 0, sizeof(tmap));
+
+	for (h1=0; h1<256; h1++) {
+		struct rsvp_session *s;
+		for (s = sht[h1]; s; s = s->next) {
+			for (h2=0; h2<=16; h2++) {
+				struct rsvp_filter *f;
+
+				for (f = s->ht[h2]; f; f = f->next) {
+					if (f->tunnelhdr == 0)
+						continue;
+					data->tgenerator = f->res.classid;
+					tunnel_bts(data);
+				}
+			}
+		}
+	}
+
+	memcpy(data->tmap, tmap, sizeof(tmap));
+}
+
+static u32 gen_tunnel(struct rsvp_head *data)
+{
+	int i, k;
+
+	for (k=0; k<2; k++) {
+		for (i=255; i>0; i--) {
+			if (++data->tgenerator == 0)
+				data->tgenerator = 1;
+			if (tunnel_bts(data))
+				return data->tgenerator;
+		}
+		tunnel_recycle(data);
+	}
+	return 0;
+}
+
+static int rsvp_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle,
+		       struct rtattr **tca,
+		       unsigned long *arg)
+{
+	struct rsvp_head *data = tp->root;
+	struct rsvp_filter *f, **fp;
+	struct rsvp_session *s, **sp;
+	struct tc_rsvp_pinfo *pinfo = NULL;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_RSVP_MAX];
+	struct tcf_exts e;
+	unsigned h1, h2;
+	u32 *dst;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
+		return -EINVAL;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	if (err < 0)
+		return err;
+
+	if ((f = (struct rsvp_filter*)*arg) != NULL) {
+		/* Node exists: adjust only classid */
+
+		if (f->handle != handle && handle)
+			goto errout2;
+		if (tb[TCA_RSVP_CLASSID-1]) {
+			f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+			tcf_bind_filter(tp, &f->res, base);
+		}
+
+		tcf_exts_change(tp, &f->exts, &e);
+		return 0;
+	}
+
+	/* Now more serious part... */
+	err = -EINVAL;
+	if (handle)
+		goto errout2;
+	if (tb[TCA_RSVP_DST-1] == NULL)
+		goto errout2;
+
+	err = -ENOBUFS;
+	f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
+	if (f == NULL)
+		goto errout2;
+
+	memset(f, 0, sizeof(*f));
+	h2 = 16;
+	if (tb[TCA_RSVP_SRC-1]) {
+		err = -EINVAL;
+		if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
+			goto errout;
+		memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+		h2 = hash_src(f->src);
+	}
+	if (tb[TCA_RSVP_PINFO-1]) {
+		err = -EINVAL;
+		if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
+			goto errout;
+		pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
+		f->spi = pinfo->spi;
+		f->tunnelhdr = pinfo->tunnelhdr;
+	}
+	if (tb[TCA_RSVP_CLASSID-1]) {
+		err = -EINVAL;
+		if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
+			goto errout;
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+	}
+
+	err = -EINVAL;
+	if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
+		goto errout;
+	dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
+	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
+
+	err = -ENOMEM;
+	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
+		goto errout;
+
+	if (f->tunnelhdr) {
+		err = -EINVAL;
+		if (f->res.classid > 255)
+			goto errout;
+
+		err = -ENOMEM;
+		if (f->res.classid == 0 &&
+		    (f->res.classid = gen_tunnel(data)) == 0)
+			goto errout;
+	}
+
+	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		    pinfo && pinfo->protocol == s->protocol &&
+		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
+#if RSVP_DST_LEN == 4
+		    && dst[0] == s->dst[0]
+		    && dst[1] == s->dst[1]
+		    && dst[2] == s->dst[2]
+#endif
+		    && pinfo->tunnelid == s->tunnelid) {
+
+insert:
+			/* OK, we found appropriate session */
+
+			fp = &s->ht[h2];
+
+			f->sess = s;
+			if (f->tunnelhdr == 0)
+				tcf_bind_filter(tp, &f->res, base);
+
+			tcf_exts_change(tp, &f->exts, &e);
+
+			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
+				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+					break;
+			f->next = *fp;
+			wmb();
+			*fp = f;
+
+			*arg = (unsigned long)f;
+			return 0;
+		}
+	}
+
+	/* No session found. Create new one. */
+
+	err = -ENOBUFS;
+	s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
+	if (s == NULL)
+		goto errout;
+	memset(s, 0, sizeof(*s));
+	memcpy(s->dst, dst, sizeof(s->dst));
+
+	if (pinfo) {
+		s->dpi = pinfo->dpi;
+		s->protocol = pinfo->protocol;
+		s->tunnelid = pinfo->tunnelid;
+	}
+	for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
+		if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
+			break;
+	}
+	s->next = *sp;
+	wmb();
+	*sp = s;
+	
+	goto insert;
+
+errout:
+	if (f)
+		kfree(f);
+errout2:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct rsvp_head *head = tp->root;
+	unsigned h, h1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < 256; h++) {
+		struct rsvp_session *s;
+
+		for (s = head->ht[h]; s; s = s->next) {
+			for (h1 = 0; h1 <= 16; h1++) {
+				struct rsvp_filter *f;
+
+				for (f = s->ht[h1]; f; f = f->next) {
+					if (arg->count < arg->skip) {
+						arg->count++;
+						continue;
+					}
+					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+						arg->stop = 1;
+						return;
+					}
+					arg->count++;
+				}
+			}
+		}
+	}
+}
+
+static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct rsvp_filter *f = (struct rsvp_filter*)fh;
+	struct rsvp_session *s;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_rsvp_pinfo pinfo;
+
+	if (f == NULL)
+		return skb->len;
+	s = f->sess;
+
+	t->tcm_handle = f->handle;
+
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
+	pinfo.dpi = s->dpi;
+	pinfo.spi = f->spi;
+	pinfo.protocol = s->protocol;
+	pinfo.tunnelid = s->tunnelid;
+	pinfo.tunnelhdr = f->tunnelhdr;
+	RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
+	if (f->res.classid)
+		RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
+	if (((f->handle>>8)&0xFF) != 16)
+		RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
+
+	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb->tail - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
+		goto rtattr_failure;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops RSVP_OPS = {
+	.next		=	NULL,
+	.kind		=	RSVP_ID,
+	.classify	=	rsvp_classify,
+	.init		=	rsvp_init,
+	.destroy	=	rsvp_destroy,
+	.get		=	rsvp_get,
+	.put		=	rsvp_put,
+	.change		=	rsvp_change,
+	.delete		=	rsvp_delete,
+	.walk		=	rsvp_walk,
+	.dump		=	rsvp_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_rsvp(void)
+{
+	return register_tcf_proto_ops(&RSVP_OPS);
+}
+
+static void __exit exit_rsvp(void) 
+{
+	unregister_tcf_proto_ops(&RSVP_OPS);
+}
+
+module_init(init_rsvp)
+module_exit(exit_rsvp)
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -0,0 +1,44 @@
+/*
+ * net/sched/cls_rsvp6.c	Special RSVP packet classifier for IPv6.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+#define RSVP_DST_LEN	4
+#define RSVP_ID		"rsvp6"
+#define RSVP_OPS	cls_rsvp6_ops
+
+#include "cls_rsvp.h"
+MODULE_LICENSE("GPL");
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -0,0 +1,537 @@
+/*
+ * net/sched/cls_tcindex.c	Packet classifier for skb->tc_index
+ *
+ * Written 1998,1999 by Werner Almesberger, EPFL ICA
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <net/route.h>
+
+
+/*
+ * Not quite sure if we need all the xchgs Alexey uses when accessing things.
+ * Can always add them later ... :)
+ */
+
+/*
+ * Passing parameters to the root seems to be done more awkwardly than really
+ * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
+ * verified. FIXME.
+ */
+
+#define PERFECT_HASH_THRESHOLD	64	/* use perfect hash if not bigger */
+#define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
+
+
+#if 1 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+#define	PRIV(tp)	((struct tcindex_data *) (tp)->root)
+
+
+struct tcindex_filter_result {
+	struct tcf_exts		exts;
+	struct tcf_result	res;
+};
+
+struct tcindex_filter {
+	u16 key;
+	struct tcindex_filter_result result;
+	struct tcindex_filter *next;
+};
+
+
+struct tcindex_data {
+	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
+	struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
+				      NULL if unused */
+	u16 mask;		/* AND key with mask */
+	int shift;		/* shift ANDed key to the right */
+	int hash;		/* hash table size; 0 if undefined */
+	int alloc_hash;		/* allocated size */
+	int fall_through;	/* 0: only classify if explicit match */
+};
+
+static struct tcf_ext_map tcindex_ext_map = {
+	.police = TCA_TCINDEX_POLICE,
+	.action = TCA_TCINDEX_ACT
+};
+
+static inline int
+tcindex_filter_is_set(struct tcindex_filter_result *r)
+{
+	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+}
+
+static struct tcindex_filter_result *
+tcindex_lookup(struct tcindex_data *p, u16 key)
+{
+	struct tcindex_filter *f;
+
+	if (p->perfect)
+		return tcindex_filter_is_set(p->perfect + key) ?
+			p->perfect + key : NULL;
+	else if (p->h) {
+		for (f = p->h[key % p->hash]; f; f = f->next)
+			if (f->key == key)
+				return &f->result;
+	}
+
+	return NULL;
+}
+
+
+static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			    struct tcf_result *res)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *f;
+	int key = (skb->tc_index & p->mask) >> p->shift;
+
+	D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p);
+
+	f = tcindex_lookup(p, key);
+	if (!f) {
+		if (!p->fall_through)
+			return -1;
+		res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+		res->class = 0;
+		D2PRINTK("alg 0x%x\n",res->classid);
+		return 0;
+	}
+	*res = f->res;
+	D2PRINTK("map 0x%x\n",res->classid);
+
+	return tcf_exts_exec(skb, &f->exts, res);
+}
+
+
+static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r;
+
+	DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
+	if (p->perfect && handle >= p->alloc_hash)
+		return 0;
+	r = tcindex_lookup(p, handle);
+	return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+}
+
+
+static void tcindex_put(struct tcf_proto *tp, unsigned long f)
+{
+	DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f);
+}
+
+
+static int tcindex_init(struct tcf_proto *tp)
+{
+	struct tcindex_data *p;
+
+	DPRINTK("tcindex_init(tp %p)\n",tp);
+	p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memset(p, 0, sizeof(*p));
+	p->mask = 0xffff;
+	p->hash = DEFAULT_HASH_SIZE;
+	p->fall_through = 1;
+
+	tp->root = p;
+	return 0;
+}
+
+
+static int
+__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter *f = NULL;
+
+	DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
+	if (p->perfect) {
+		if (!r->res.class)
+			return -ENOENT;
+	} else {
+		int i;
+		struct tcindex_filter **walk = NULL;
+
+		for (i = 0; i < p->hash; i++)
+			for (walk = p->h+i; *walk; walk = &(*walk)->next)
+				if (&(*walk)->result == r)
+					goto found;
+		return -ENOENT;
+
+found:
+		f = *walk;
+		if (lock)
+			tcf_tree_lock(tp);
+		*walk = f->next;
+		if (lock)
+			tcf_tree_unlock(tp);
+	}
+	tcf_unbind_filter(tp, &r->res);
+	tcf_exts_destroy(tp, &r->exts);
+	if (f)
+		kfree(f);
+	return 0;
+}
+
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	return __tcindex_delete(tp, arg, 1);
+}
+
+static inline int
+valid_perfect_hash(struct tcindex_data *p)
+{
+	return  p->hash > (p->mask >> p->shift);
+}
+
+static int
+tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
+		  struct tcindex_data *p, struct tcindex_filter_result *r,
+		  struct rtattr **tb, struct rtattr *est)
+{
+	int err, balloc = 0;
+	struct tcindex_filter_result new_filter_result, *old_r = r;
+	struct tcindex_filter_result cr;
+	struct tcindex_data cp;
+	struct tcindex_filter *f = NULL; /* make gcc behave */
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map);
+	if (err < 0)
+		return err;
+	
+	memcpy(&cp, p, sizeof(cp));
+	memset(&new_filter_result, 0, sizeof(new_filter_result));
+
+	if (old_r)
+		memcpy(&cr, r, sizeof(cr));
+	else
+		memset(&cr, 0, sizeof(cr));
+
+	err = -EINVAL;
+	if (tb[TCA_TCINDEX_HASH-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(u32))
+			goto errout;
+		cp.hash = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
+	}
+
+	if (tb[TCA_TCINDEX_MASK-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(u16))
+			goto errout;
+		cp.mask = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
+	}
+
+	if (tb[TCA_TCINDEX_SHIFT-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(u16))
+			goto errout;
+		cp.shift = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
+	}
+
+	err = -EBUSY;
+	/* Hash already allocated, make sure that we still meet the
+	 * requirements for the allocated hash.
+	 */
+	if (cp.perfect) {
+		if (!valid_perfect_hash(&cp) ||
+		    cp.hash > cp.alloc_hash)
+			goto errout;
+	} else if (cp.h && cp.hash != cp.alloc_hash)
+		goto errout;
+
+	err = -EINVAL;
+	if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(u32))
+			goto errout;
+		cp.fall_through =
+			*(u32 *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
+	}
+
+	if (!cp.hash) {
+		/* Hash not specified, use perfect hash if the upper limit
+		 * of the hashing index is below the threshold.
+		 */
+		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
+			cp.hash = (cp.mask >> cp.shift)+1;
+		else
+			cp.hash = DEFAULT_HASH_SIZE;
+	}
+
+	if (!cp.perfect && !cp.h)
+		cp.alloc_hash = cp.hash;
+
+	/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
+	 * but then, we'd fail handles that may become valid after some future
+	 * mask change. While this is extremely unlikely to ever matter,
+	 * the check below is safer (and also more backwards-compatible).
+	 */
+	if (cp.perfect || valid_perfect_hash(&cp))
+		if (handle >= cp.alloc_hash)
+			goto errout;
+
+
+	err = -ENOMEM;
+	if (!cp.perfect && !cp.h) {
+		if (valid_perfect_hash(&cp)) {
+			cp.perfect = kmalloc(cp.hash * sizeof(*r), GFP_KERNEL);
+			if (!cp.perfect)
+				goto errout;
+			memset(cp.perfect, 0, cp.hash * sizeof(*r));
+			balloc = 1;
+		} else {
+			cp.h = kmalloc(cp.hash * sizeof(f), GFP_KERNEL);
+			if (!cp.h)
+				goto errout;
+			memset(cp.h, 0, cp.hash * sizeof(f));
+			balloc = 2;
+		}
+	}
+
+	if (cp.perfect)
+		r = cp.perfect + handle;
+	else
+		r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+
+	if (r == &new_filter_result) {
+		f = kmalloc(sizeof(*f), GFP_KERNEL);
+		if (!f)
+			goto errout_alloc;
+		memset(f, 0, sizeof(*f));
+ 	}
+
+	if (tb[TCA_TCINDEX_CLASSID-1]) {
+		cr.res.classid = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]);
+		tcf_bind_filter(tp, &cr.res, base);
+ 	}
+
+	tcf_exts_change(tp, &cr.exts, &e);
+
+	tcf_tree_lock(tp);
+	if (old_r && old_r != r)
+		memset(old_r, 0, sizeof(*old_r));
+
+	memcpy(p, &cp, sizeof(cp));
+	memcpy(r, &cr, sizeof(cr));
+
+	if (r == &new_filter_result) {
+		struct tcindex_filter **fp;
+
+		f->key = handle;
+		f->result = new_filter_result;
+		f->next = NULL;
+		for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
+			/* nothing */;
+		*fp = f;
+ 	}
+	tcf_tree_unlock(tp);
+
+	return 0;
+
+errout_alloc:
+	if (balloc == 1)
+		kfree(cp.perfect);
+	else if (balloc == 2)
+		kfree(cp.h);
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int
+tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+	       struct rtattr **tca, unsigned long *arg)
+{
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_TCINDEX_MAX];
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+
+	DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
+	    "p %p,r %p,*arg 0x%lx\n",
+	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+
+	if (!opt)
+		return 0;
+
+	if (rtattr_parse_nested(tb, TCA_TCINDEX_MAX, opt) < 0)
+		return -EINVAL;
+
+	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE-1]);
+}
+
+
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter *f,*next;
+	int i;
+
+	DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
+	if (p->perfect) {
+		for (i = 0; i < p->hash; i++) {
+			if (!p->perfect[i].res.class)
+				continue;
+			if (walker->count >= walker->skip) {
+				if (walker->fn(tp,
+				    (unsigned long) (p->perfect+i), walker)
+				     < 0) {
+					walker->stop = 1;
+					return;
+				}
+			}
+			walker->count++;
+		}
+	}
+	if (!p->h)
+		return;
+	for (i = 0; i < p->hash; i++) {
+		for (f = p->h[i]; f; f = next) {
+			next = f->next;
+			if (walker->count >= walker->skip) {
+				if (walker->fn(tp,(unsigned long) &f->result,
+				    walker) < 0) {
+					walker->stop = 1;
+					return;
+				}
+			}
+			walker->count++;
+		}
+	}
+}
+
+
+static int tcindex_destroy_element(struct tcf_proto *tp,
+    unsigned long arg, struct tcf_walker *walker)
+{
+	return __tcindex_delete(tp, arg, 0);
+}
+
+
+static void tcindex_destroy(struct tcf_proto *tp)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcf_walker walker;
+
+	DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p);
+	walker.count = 0;
+	walker.skip = 0;
+	walker.fn = &tcindex_destroy_element;
+	tcindex_walk(tp,&walker);
+	if (p->perfect)
+		kfree(p->perfect);
+	if (p->h)
+		kfree(p->h);
+	kfree(p);
+	tp->root = NULL;
+}
+
+
+static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
+    struct sk_buff *skb, struct tcmsg *t)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
+	    tp,fh,skb,t,p,r,b);
+	DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	if (!fh) {
+		t->tcm_handle = ~0; /* whatever ... */
+		RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash);
+		RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask);
+		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
+		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
+		    &p->fall_through);
+		rta->rta_len = skb->tail-b;
+	} else {
+		if (p->perfect) {
+			t->tcm_handle = r-p->perfect;
+		} else {
+			struct tcindex_filter *f;
+			int i;
+
+			t->tcm_handle = 0;
+			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
+				for (f = p->h[i]; !t->tcm_handle && f;
+				     f = f->next) {
+					if (&f->result == r)
+						t->tcm_handle = f->key;
+				}
+			}
+		}
+		DPRINTK("handle = %d\n",t->tcm_handle);
+		if (r->res.class)
+			RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
+
+		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
+			goto rtattr_failure;
+		rta->rta_len = skb->tail-b;
+
+		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
+			goto rtattr_failure;
+	}
+	
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_tcindex_ops = {
+	.next		=	NULL,
+	.kind		=	"tcindex",
+	.classify	=	tcindex_classify,
+	.init		=	tcindex_init,
+	.destroy	=	tcindex_destroy,
+	.get		=	tcindex_get,
+	.put		=	tcindex_put,
+	.change		=	tcindex_change,
+	.delete		=	tcindex_delete,
+	.walk		=	tcindex_walk,
+	.dump		=	tcindex_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_tcindex(void)
+{
+	return register_tcf_proto_ops(&cls_tcindex_ops);
+}
+
+static void __exit exit_tcindex(void) 
+{
+	unregister_tcf_proto_ops(&cls_tcindex_ops);
+}
+
+module_init(init_tcindex)
+module_exit(exit_tcindex)
+MODULE_LICENSE("GPL");
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -0,0 +1,828 @@
+/*
+ * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *	The filters are packed to hash tables of key nodes
+ *	with a set of 32bit key/mask pairs at every node.
+ *	Nodes reference next level hash tables etc.
+ *
+ *	This scheme is the best universal classifier I managed to
+ *	invent; it is not super-fast, but it is not slow (provided you
+ *	program it correctly), and general enough.  And its relative
+ *	speed grows as the number of rules becomes larger.
+ *
+ *	It seems that it represents the best middle point between
+ *	speed and manageability both by human and by machine.
+ *
+ *	It is especially useful for link sharing combined with QoS;
+ *	pure RSVP doesn't need such a general approach and can use
+ *	much simpler (and faster) schemes, sort of cls_rsvp.c.
+ *
+ *	JHS: We should remove the CONFIG_NET_CLS_IND from here
+ *	eventually when the meta match extension is made available
+ *
+ *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct tc_u_knode
+{
+	struct tc_u_knode	*next;
+	u32			handle;
+	struct tc_u_hnode	*ht_up;
+	struct tcf_exts		exts;
+#ifdef CONFIG_NET_CLS_IND
+	char                     indev[IFNAMSIZ];
+#endif
+	u8			fshift;
+	struct tcf_result	res;
+	struct tc_u_hnode	*ht_down;
+#ifdef CONFIG_CLS_U32_PERF
+	struct tc_u32_pcnt	*pf;
+#endif
+#ifdef CONFIG_CLS_U32_MARK
+	struct tc_u32_mark	mark;
+#endif
+	struct tc_u32_sel	sel;
+};
+
+struct tc_u_hnode
+{
+	struct tc_u_hnode	*next;
+	u32			handle;
+	u32			prio;
+	struct tc_u_common	*tp_c;
+	int			refcnt;
+	unsigned		divisor;
+	struct tc_u_knode	*ht[1];
+};
+
+struct tc_u_common
+{
+	struct tc_u_common	*next;
+	struct tc_u_hnode	*hlist;
+	struct Qdisc		*q;
+	int			refcnt;
+	u32			hgenerator;
+};
+
+static struct tcf_ext_map u32_ext_map = {
+	.action = TCA_U32_ACT,
+	.police = TCA_U32_POLICE
+};
+
+static struct tc_u_common *u32_list;
+
+static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift)
+{
+	unsigned h = (key & sel->hmask)>>fshift;
+
+	return h;
+}
+
+static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
+{
+	struct {
+		struct tc_u_knode *knode;
+		u8		  *ptr;
+	} stack[TC_U32_MAXDEPTH];
+
+	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+	u8 *ptr = skb->nh.raw;
+	struct tc_u_knode *n;
+	int sdepth = 0;
+	int off2 = 0;
+	int sel = 0;
+#ifdef CONFIG_CLS_U32_PERF
+	int j;
+#endif
+	int i, r;
+
+next_ht:
+	n = ht->ht[sel];
+
+next_knode:
+	if (n) {
+		struct tc_u32_key *key = n->sel.keys;
+
+#ifdef CONFIG_CLS_U32_PERF
+		n->pf->rcnt +=1;
+		j = 0;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+		if ((skb->nfmark & n->mark.mask) != n->mark.val) {
+			n = n->next;
+			goto next_knode;
+		} else {
+			n->mark.success++;
+		}
+#endif
+
+		for (i = n->sel.nkeys; i>0; i--, key++) {
+
+			if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
+				n = n->next;
+				goto next_knode;
+			}
+#ifdef CONFIG_CLS_U32_PERF
+			n->pf->kcnts[j] +=1;
+			j++;
+#endif
+		}
+		if (n->ht_down == NULL) {
+check_terminal:
+			if (n->sel.flags&TC_U32_TERMINAL) {
+
+				*res = n->res;
+#ifdef CONFIG_NET_CLS_IND
+				if (!tcf_match_indev(skb, n->indev)) {
+					n = n->next;
+					goto next_knode;
+				}
+#endif
+#ifdef CONFIG_CLS_U32_PERF
+				n->pf->rhit +=1;
+#endif
+				r = tcf_exts_exec(skb, &n->exts, res);
+				if (r < 0) {
+					n = n->next;
+					goto next_knode;
+				}
+
+				return r;
+			}
+			n = n->next;
+			goto next_knode;
+		}
+
+		/* PUSH */
+		if (sdepth >= TC_U32_MAXDEPTH)
+			goto deadloop;
+		stack[sdepth].knode = n;
+		stack[sdepth].ptr = ptr;
+		sdepth++;
+
+		ht = n->ht_down;
+		sel = 0;
+		if (ht->divisor)
+			sel = ht->divisor&u32_hash_fold(*(u32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
+
+		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+			goto next_ht;
+
+		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+			off2 = n->sel.off + 3;
+			if (n->sel.flags&TC_U32_VAROFFSET)
+				off2 += ntohs(n->sel.offmask & *(u16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
+			off2 &= ~3;
+		}
+		if (n->sel.flags&TC_U32_EAT) {
+			ptr += off2;
+			off2 = 0;
+		}
+
+		if (ptr < skb->tail)
+			goto next_ht;
+	}
+
+	/* POP */
+	if (sdepth--) {
+		n = stack[sdepth].knode;
+		ht = n->ht_up;
+		ptr = stack[sdepth].ptr;
+		goto check_terminal;
+	}
+	return -1;
+
+deadloop:
+	if (net_ratelimit())
+		printk("cls_u32: dead loop\n");
+	return -1;
+}
+
+static __inline__ struct tc_u_hnode *
+u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
+{
+	struct tc_u_hnode *ht;
+
+	for (ht = tp_c->hlist; ht; ht = ht->next)
+		if (ht->handle == handle)
+			break;
+
+	return ht;
+}
+
+static __inline__ struct tc_u_knode *
+u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
+{
+	unsigned sel;
+	struct tc_u_knode *n = NULL;
+
+	sel = TC_U32_HASH(handle);
+	if (sel > ht->divisor)
+		goto out;
+
+	for (n = ht->ht[sel]; n; n = n->next)
+		if (n->handle == handle)
+			break;
+out:
+	return n;
+}
+
+
+static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+{
+	struct tc_u_hnode *ht;
+	struct tc_u_common *tp_c = tp->data;
+
+	if (TC_U32_HTID(handle) == TC_U32_ROOT)
+		ht = tp->root;
+	else
+		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
+
+	if (!ht)
+		return 0;
+
+	if (TC_U32_KEY(handle) == 0)
+		return (unsigned long)ht;
+
+	return (unsigned long)u32_lookup_key(ht, handle);
+}
+
+static void u32_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static u32 gen_new_htid(struct tc_u_common *tp_c)
+{
+	int i = 0x800;
+
+	do {
+		if (++tp_c->hgenerator == 0x7FF)
+			tp_c->hgenerator = 1;
+	} while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+
+	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+}
+
+static int u32_init(struct tcf_proto *tp)
+{
+	struct tc_u_hnode *root_ht;
+	struct tc_u_common *tp_c;
+
+	for (tp_c = u32_list; tp_c; tp_c = tp_c->next)
+		if (tp_c->q == tp->q)
+			break;
+
+	root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL);
+	if (root_ht == NULL)
+		return -ENOBUFS;
+
+	memset(root_ht, 0, sizeof(*root_ht));
+	root_ht->divisor = 0;
+	root_ht->refcnt++;
+	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+	root_ht->prio = tp->prio;
+
+	if (tp_c == NULL) {
+		tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL);
+		if (tp_c == NULL) {
+			kfree(root_ht);
+			return -ENOBUFS;
+		}
+		memset(tp_c, 0, sizeof(*tp_c));
+		tp_c->q = tp->q;
+		tp_c->next = u32_list;
+		u32_list = tp_c;
+	}
+
+	tp_c->refcnt++;
+	root_ht->next = tp_c->hlist;
+	tp_c->hlist = root_ht;
+	root_ht->tp_c = tp_c;
+
+	tp->root = root_ht;
+	tp->data = tp_c;
+	return 0;
+}
+
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+{
+	tcf_unbind_filter(tp, &n->res);
+	tcf_exts_destroy(tp, &n->exts);
+	if (n->ht_down)
+		n->ht_down->refcnt--;
+#ifdef CONFIG_CLS_U32_PERF
+	if (n && (NULL != n->pf))
+		kfree(n->pf);
+#endif
+	kfree(n);
+	return 0;
+}
+
+static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
+{
+	struct tc_u_knode **kp;
+	struct tc_u_hnode *ht = key->ht_up;
+
+	if (ht) {
+		for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
+			if (*kp == key) {
+				tcf_tree_lock(tp);
+				*kp = key->next;
+				tcf_tree_unlock(tp);
+
+				u32_destroy_key(tp, key);
+				return 0;
+			}
+		}
+	}
+	BUG_TRAP(0);
+	return 0;
+}
+
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+{
+	struct tc_u_knode *n;
+	unsigned h;
+
+	for (h=0; h<=ht->divisor; h++) {
+		while ((n = ht->ht[h]) != NULL) {
+			ht->ht[h] = n->next;
+
+			u32_destroy_key(tp, n);
+		}
+	}
+}
+
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode **hn;
+
+	BUG_TRAP(!ht->refcnt);
+
+	u32_clear_hnode(tp, ht);
+
+	for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
+		if (*hn == ht) {
+			*hn = ht->next;
+			kfree(ht);
+			return 0;
+		}
+	}
+
+	BUG_TRAP(0);
+	return -ENOENT;
+}
+
+static void u32_destroy(struct tcf_proto *tp)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
+
+	BUG_TRAP(root_ht != NULL);
+
+	if (root_ht && --root_ht->refcnt == 0)
+		u32_destroy_hnode(tp, root_ht);
+
+	if (--tp_c->refcnt == 0) {
+		struct tc_u_hnode *ht;
+		struct tc_u_common **tp_cp;
+
+		for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) {
+			if (*tp_cp == tp_c) {
+				*tp_cp = tp_c->next;
+				break;
+			}
+		}
+
+		for (ht=tp_c->hlist; ht; ht = ht->next)
+			u32_clear_hnode(tp, ht);
+
+		while ((ht = tp_c->hlist) != NULL) {
+			tp_c->hlist = ht->next;
+
+			BUG_TRAP(ht->refcnt == 0);
+
+			kfree(ht);
+		};
+
+		kfree(tp_c);
+	}
+
+	tp->data = NULL;
+}
+
+static int u32_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+
+	if (ht == NULL)
+		return 0;
+
+	if (TC_U32_KEY(ht->handle))
+		return u32_delete_key(tp, (struct tc_u_knode*)ht);
+
+	if (tp->root == ht)
+		return -EINVAL;
+
+	if (--ht->refcnt == 0)
+		u32_destroy_hnode(tp, ht);
+
+	return 0;
+}
+
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+{
+	struct tc_u_knode *n;
+	unsigned i = 0x7FF;
+
+	for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+		if (i < TC_U32_NODE(n->handle))
+			i = TC_U32_NODE(n->handle);
+	i++;
+
+	return handle|(i>0xFFF ? 0xFFF : i);
+}
+
+static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
+			 struct tc_u_hnode *ht,
+			 struct tc_u_knode *n, struct rtattr **tb,
+			 struct rtattr *est)
+{
+	int err;
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_U32_LINK-1]) {
+		u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
+		struct tc_u_hnode *ht_down = NULL;
+
+		if (TC_U32_KEY(handle))
+			goto errout;
+
+		if (handle) {
+			ht_down = u32_lookup_ht(ht->tp_c, handle);
+
+			if (ht_down == NULL)
+				goto errout;
+			ht_down->refcnt++;
+		}
+
+		tcf_tree_lock(tp);
+		ht_down = xchg(&n->ht_down, ht_down);
+		tcf_tree_unlock(tp);
+
+		if (ht_down)
+			ht_down->refcnt--;
+	}
+	if (tb[TCA_U32_CLASSID-1]) {
+		n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
+		tcf_bind_filter(tp, &n->res, base);
+	}
+
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_U32_INDEV-1]) {
+		int err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]);
+		if (err < 0)
+			goto errout;
+	}
+#endif
+	tcf_exts_change(tp, &n->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+		      struct rtattr **tca,
+		      unsigned long *arg)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	struct tc_u32_sel *s;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_U32_MAX];
+	u32 htid;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_U32_MAX, opt) < 0)
+		return -EINVAL;
+
+	if ((n = (struct tc_u_knode*)*arg) != NULL) {
+		if (TC_U32_KEY(n->handle) == 0)
+			return -EINVAL;
+
+		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE-1]);
+	}
+
+	if (tb[TCA_U32_DIVISOR-1]) {
+		unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]);
+
+		if (--divisor > 0x100)
+			return -EINVAL;
+		if (TC_U32_KEY(handle))
+			return -EINVAL;
+		if (handle == 0) {
+			handle = gen_new_htid(tp->data);
+			if (handle == 0)
+				return -ENOMEM;
+		}
+		ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+		if (ht == NULL)
+			return -ENOBUFS;
+		memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*));
+		ht->tp_c = tp_c;
+		ht->refcnt = 0;
+		ht->divisor = divisor;
+		ht->handle = handle;
+		ht->prio = tp->prio;
+		ht->next = tp_c->hlist;
+		tp_c->hlist = ht;
+		*arg = (unsigned long)ht;
+		return 0;
+	}
+
+	if (tb[TCA_U32_HASH-1]) {
+		htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]);
+		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
+			ht = tp->root;
+			htid = ht->handle;
+		} else {
+			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
+			if (ht == NULL)
+				return -EINVAL;
+		}
+	} else {
+		ht = tp->root;
+		htid = ht->handle;
+	}
+
+	if (ht->divisor < TC_U32_HASH(htid))
+		return -EINVAL;
+
+	if (handle) {
+		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
+			return -EINVAL;
+		handle = htid | TC_U32_NODE(handle);
+	} else
+		handle = gen_new_kid(ht, htid);
+
+	if (tb[TCA_U32_SEL-1] == 0 ||
+	    RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
+		return -EINVAL;
+
+	s = RTA_DATA(tb[TCA_U32_SEL-1]);
+
+	n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
+	if (n == NULL)
+		return -ENOBUFS;
+
+	memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key));
+#ifdef CONFIG_CLS_U32_PERF
+	n->pf = kmalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
+	if (n->pf == NULL) {
+		kfree(n);
+		return -ENOBUFS;
+	}
+	memset(n->pf, 0, sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64));
+#endif
+
+	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+	n->ht_up = ht;
+	n->handle = handle;
+{
+	u8 i = 0;
+	u32 mask = s->hmask;
+	if (mask) {
+		while (!(mask & 1)) {
+			i++;
+			mask>>=1;
+		}
+	}
+	n->fshift = i;
+}
+
+#ifdef CONFIG_CLS_U32_MARK
+	if (tb[TCA_U32_MARK-1]) {
+		struct tc_u32_mark *mark;
+
+		if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) {
+#ifdef CONFIG_CLS_U32_PERF
+			kfree(n->pf);
+#endif
+			kfree(n);
+			return -EINVAL;
+		}
+		mark = RTA_DATA(tb[TCA_U32_MARK-1]);
+		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
+		n->mark.success = 0;
+	}
+#endif
+
+	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]);
+	if (err == 0) {
+		struct tc_u_knode **ins;
+		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
+			if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+				break;
+
+		n->next = *ins;
+		wmb();
+		*ins = n;
+
+		*arg = (unsigned long)n;
+		return 0;
+	}
+#ifdef CONFIG_CLS_U32_PERF
+	if (n && (NULL != n->pf))
+		kfree(n->pf);
+#endif
+	kfree(n);
+	return err;
+}
+
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	unsigned h;
+
+	if (arg->stop)
+		return;
+
+	for (ht = tp_c->hlist; ht; ht = ht->next) {
+		if (ht->prio != tp->prio)
+			continue;
+		if (arg->count >= arg->skip) {
+			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+		}
+		arg->count++;
+		for (h = 0; h <= ht->divisor; h++) {
+			for (n = ht->ht[h]; n; n = n->next) {
+				if (arg->count < arg->skip) {
+					arg->count++;
+					continue;
+				}
+				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+					arg->stop = 1;
+					return;
+				}
+				arg->count++;
+			}
+		}
+	}
+}
+
+static int u32_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct tc_u_knode *n = (struct tc_u_knode*)fh;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+
+	if (n == NULL)
+		return skb->len;
+
+	t->tcm_handle = n->handle;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (TC_U32_KEY(n->handle) == 0) {
+		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
+		u32 divisor = ht->divisor+1;
+		RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
+	} else {
+		RTA_PUT(skb, TCA_U32_SEL,
+			sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
+			&n->sel);
+		if (n->ht_up) {
+			u32 htid = n->handle & 0xFFFFF000;
+			RTA_PUT(skb, TCA_U32_HASH, 4, &htid);
+		}
+		if (n->res.classid)
+			RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
+		if (n->ht_down)
+			RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
+
+#ifdef CONFIG_CLS_U32_MARK
+		if (n->mark.val || n->mark.mask)
+			RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
+#endif
+
+		if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
+			goto rtattr_failure;
+
+#ifdef CONFIG_NET_CLS_IND
+		if(strlen(n->indev))
+			RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
+#endif
+#ifdef CONFIG_CLS_U32_PERF
+		RTA_PUT(skb, TCA_U32_PCNT, 
+		sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
+			n->pf);
+#endif
+	}
+
+	rta->rta_len = skb->tail - b;
+	if (TC_U32_KEY(n->handle))
+		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
+			goto rtattr_failure;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_u32_ops = {
+	.next		=	NULL,
+	.kind		=	"u32",
+	.classify	=	u32_classify,
+	.init		=	u32_init,
+	.destroy	=	u32_destroy,
+	.get		=	u32_get,
+	.put		=	u32_put,
+	.change		=	u32_change,
+	.delete		=	u32_delete,
+	.walk		=	u32_walk,
+	.dump		=	u32_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_u32(void)
+{
+	printk("u32 classifier\n");
+#ifdef CONFIG_CLS_U32_PERF
+	printk("    Perfomance counters on\n");
+#endif
+#ifdef CONFIG_NET_CLS_POLICE
+	printk("    OLD policer on \n");
+#endif
+#ifdef CONFIG_NET_CLS_IND
+	printk("    input device check on \n");
+#endif
+#ifdef CONFIG_NET_CLS_ACT
+	printk("    Actions configured \n");
+#endif
+	return register_tcf_proto_ops(&cls_u32_ops);
+}
+
+static void __exit exit_u32(void) 
+{
+	unregister_tcf_proto_ops(&cls_u32_ops);
+}
+
+module_init(init_u32)
+module_exit(exit_u32)
+MODULE_LICENSE("GPL");
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -0,0 +1,101 @@
+/*
+ * net/sched/em_cmp.c	Simple packet data comparison ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_cmp.h>
+#include <net/pkt_cls.h>
+
+static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
+{
+	return unlikely(cmp->flags & TCF_EM_CMP_TRANS);
+}
+
+static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off;
+	u32 val = 0;
+
+	if (!tcf_valid_offset(skb, ptr, cmp->align))
+		return 0;
+
+	switch (cmp->align) {
+		case TCF_EM_ALIGN_U8:
+			val = *ptr;
+			break;
+
+		case TCF_EM_ALIGN_U16:
+			val = *ptr << 8;
+			val |= *(ptr+1);
+
+			if (cmp_needs_transformation(cmp))
+				val = be16_to_cpu(val);
+			break;
+
+		case TCF_EM_ALIGN_U32:
+			/* Worth checking boundries? The branching seems
+			 * to get worse. Visit again. */
+			val = *ptr << 24;
+			val |= *(ptr+1) << 16;
+			val |= *(ptr+2) << 8;
+			val |= *(ptr+3);
+
+			if (cmp_needs_transformation(cmp))
+				val = be32_to_cpu(val);
+			break;
+
+		default:
+			return 0;
+	}
+
+	if (cmp->mask)
+		val &= cmp->mask;
+
+	switch (cmp->opnd) {
+		case TCF_EM_OPND_EQ:
+			return val == cmp->val;
+		case TCF_EM_OPND_LT:
+			return val < cmp->val;
+		case TCF_EM_OPND_GT:
+			return val > cmp->val;
+	}
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_cmp_ops = {
+	.kind	  = TCF_EM_CMP,
+	.datalen  = sizeof(struct tcf_em_cmp),
+	.match	  = em_cmp_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_cmp_ops.link)
+};
+
+static int __init init_em_cmp(void)
+{
+	return tcf_em_register(&em_cmp_ops);
+}
+
+static void __exit exit_em_cmp(void) 
+{
+	tcf_em_unregister(&em_cmp_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_cmp);
+module_exit(exit_em_cmp);
+
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -0,0 +1,661 @@
+/*
+ * net/sched/em_meta.c	Metadata ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ * 
+ * 	The metadata ematch compares two meta objects where each object
+ * 	represents either a meta value stored in the kernel or a static
+ * 	value provided by userspace. The objects are not provided by
+ * 	userspace itself but rather a definition providing the information
+ * 	to build them. Every object is of a certain type which must be
+ * 	equal to the object it is being compared to.
+ *
+ * 	The definition of a objects conists of the type (meta type), a
+ * 	identifier (meta id) and additional type specific information.
+ * 	The meta id is either TCF_META_TYPE_VALUE for values provided by
+ * 	userspace or a index to the meta operations table consisting of
+ * 	function pointers to type specific meta data collectors returning
+ * 	the value of the requested meta value.
+ *
+ * 	         lvalue                                   rvalue
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 def  | id: INDEV |                           | id: VALUE |
+ * 	      | data:     |                           | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                       |
+ * 	            ---> meta_ops[INT][INDEV](...)          |
+ *                            |                            |
+ * 	            -----------                             |
+ * 	            V                                       V
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 obj  | id: INDEV |                           | id: VALUE |
+ * 	      | data: 2   |<--data got filled out     | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                         |
+ * 	            --------------> 2  equals 3 <--------------
+ *
+ * 	This is a simplified schema, the complexity varies depending
+ * 	on the meta type. Obviously, the length of the data must also
+ * 	be provided for non-numeric types.
+ *
+ * 	Additionaly, type dependant modifiers such as shift operators
+ * 	or mask may be applied to extend the functionaliy. As of now,
+ * 	the variable length type supports shifting the byte string to
+ * 	the right, eating up any number of octets and thus supporting
+ * 	wildcard interface name comparisons such as "ppp%" matching
+ * 	ppp0..9.
+ *
+ * 	NOTE: Certain meta values depend on other subsystems and are
+ * 	      only available if that subsytem is enabled in the kernel.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/tc_ematch/tc_em_meta.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/pkt_cls.h>
+
+struct meta_obj
+{
+	unsigned long		value;
+	unsigned int		len;
+};
+
+struct meta_value
+{
+	struct tcf_meta_val	hdr;
+	unsigned long		val;
+	unsigned int		len;
+};
+
+struct meta_match
+{
+	struct meta_value	lvalue;
+	struct meta_value	rvalue;
+};
+
+static inline int meta_id(struct meta_value *v)
+{
+	return TCF_META_ID(v->hdr.kind);
+}
+
+static inline int meta_type(struct meta_value *v)
+{
+	return TCF_META_TYPE(v->hdr.kind);
+}
+
+#define META_COLLECTOR(FUNC) static void meta_##FUNC(struct sk_buff *skb, \
+	struct tcf_pkt_info *info, struct meta_value *v, \
+	struct meta_obj *dst, int *err)
+
+/**************************************************************************
+ * System status & misc
+ **************************************************************************/
+
+META_COLLECTOR(int_random)
+{
+	get_random_bytes(&dst->value, sizeof(dst->value));
+}
+
+static inline unsigned long fixed_loadavg(int load)
+{
+	int rnd_load = load + (FIXED_1/200);
+	int rnd_frac = ((rnd_load & (FIXED_1-1)) * 100) >> FSHIFT;
+
+	return ((rnd_load >> FSHIFT) * 100) + rnd_frac;
+}
+
+META_COLLECTOR(int_loadavg_0)
+{
+	dst->value = fixed_loadavg(avenrun[0]);
+}
+
+META_COLLECTOR(int_loadavg_1)
+{
+	dst->value = fixed_loadavg(avenrun[1]);
+}
+
+META_COLLECTOR(int_loadavg_2)
+{
+	dst->value = fixed_loadavg(avenrun[2]);
+}
+
+/**************************************************************************
+ * Device names & indices
+ **************************************************************************/
+
+static inline int int_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = dev->ifindex;
+	return 0;
+}
+
+static inline int var_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = (unsigned long) dev->name;
+	dst->len = strlen(dev->name);
+	return 0;
+}
+
+META_COLLECTOR(int_dev)
+{
+	*err = int_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(var_dev)
+{
+	*err = var_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(int_indev)
+{
+	*err = int_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(var_indev)
+{
+	*err = var_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(int_realdev)
+{
+	*err = int_dev(skb->real_dev, dst);
+}
+
+META_COLLECTOR(var_realdev)
+{
+	*err = var_dev(skb->real_dev, dst);
+}
+
+/**************************************************************************
+ * skb attributes
+ **************************************************************************/
+
+META_COLLECTOR(int_priority)
+{
+	dst->value = skb->priority;
+}
+
+META_COLLECTOR(int_protocol)
+{
+	/* Let userspace take care of the byte ordering */
+	dst->value = skb->protocol;
+}
+
+META_COLLECTOR(int_security)
+{
+	dst->value = skb->security;
+}
+
+META_COLLECTOR(int_pkttype)
+{
+	dst->value = skb->pkt_type;
+}
+
+META_COLLECTOR(int_pktlen)
+{
+	dst->value = skb->len;
+}
+
+META_COLLECTOR(int_datalen)
+{
+	dst->value = skb->data_len;
+}
+
+META_COLLECTOR(int_maclen)
+{
+	dst->value = skb->mac_len;
+}
+
+/**************************************************************************
+ * Netfilter
+ **************************************************************************/
+
+#ifdef CONFIG_NETFILTER
+META_COLLECTOR(int_nfmark)
+{
+	dst->value = skb->nfmark;
+}
+#endif
+
+/**************************************************************************
+ * Traffic Control
+ **************************************************************************/
+
+META_COLLECTOR(int_tcindex)
+{
+	dst->value = skb->tc_index;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+META_COLLECTOR(int_tcverd)
+{
+	dst->value = skb->tc_verd;
+}
+
+META_COLLECTOR(int_tcclassid)
+{
+	dst->value = skb->tc_classid;
+}
+#endif
+
+/**************************************************************************
+ * Routing
+ **************************************************************************/
+
+#ifdef CONFIG_NET_CLS_ROUTE
+META_COLLECTOR(int_rtclassid)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = skb->dst->tclassid;
+}
+#endif
+
+META_COLLECTOR(int_rtiif)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = ((struct rtable*) skb->dst)->fl.iif;
+}
+
+/**************************************************************************
+ * Meta value collectors assignment table
+ **************************************************************************/
+
+struct meta_ops
+{
+	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
+			       struct meta_value *, struct meta_obj *, int *);
+};
+
+/* Meta value operations table listing all meta value collectors and
+ * assigns them to a type and meta id. */
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		[TCF_META_ID_DEV]	= { .get = meta_var_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_var_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_var_realdev }
+	},
+	[TCF_META_TYPE_INT] = {
+		[TCF_META_ID_RANDOM]	= { .get = meta_int_random },
+		[TCF_META_ID_LOADAVG_0]	= { .get = meta_int_loadavg_0 },
+		[TCF_META_ID_LOADAVG_1]	= { .get = meta_int_loadavg_1 },
+		[TCF_META_ID_LOADAVG_2]	= { .get = meta_int_loadavg_2 },
+		[TCF_META_ID_DEV]	= { .get = meta_int_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_int_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_int_realdev },
+		[TCF_META_ID_PRIORITY]	= { .get = meta_int_priority },
+		[TCF_META_ID_PROTOCOL]	= { .get = meta_int_protocol },
+		[TCF_META_ID_SECURITY]	= { .get = meta_int_security },
+		[TCF_META_ID_PKTTYPE]	= { .get = meta_int_pkttype },
+		[TCF_META_ID_PKTLEN]	= { .get = meta_int_pktlen },
+		[TCF_META_ID_DATALEN]	= { .get = meta_int_datalen },
+		[TCF_META_ID_MACLEN]	= { .get = meta_int_maclen },
+#ifdef CONFIG_NETFILTER
+		[TCF_META_ID_NFMARK]	= { .get = meta_int_nfmark },
+#endif
+		[TCF_META_ID_TCINDEX]	= { .get = meta_int_tcindex },
+#ifdef CONFIG_NET_CLS_ACT
+		[TCF_META_ID_TCVERDICT]	= { .get = meta_int_tcverd },
+		[TCF_META_ID_TCCLASSID]	= { .get = meta_int_tcclassid },
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+		[TCF_META_ID_RTCLASSID]	= { .get = meta_int_rtclassid },
+#endif
+		[TCF_META_ID_RTIIF]	= { .get = meta_int_rtiif }
+	}
+};
+
+static inline struct meta_ops * meta_ops(struct meta_value *val)
+{
+	return &__meta_ops[meta_type(val)][meta_id(val)];
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_VAR
+ **************************************************************************/
+
+static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	int r = a->len - b->len;
+
+	if (r == 0)
+		r = memcmp((void *) a->value, (void *) b->value, a->len);
+
+	return r;
+}
+
+static int meta_var_change(struct meta_value *dst, struct rtattr *rta)
+{
+	int len = RTA_PAYLOAD(rta);
+
+	dst->val = (unsigned long) kmalloc(len, GFP_KERNEL);
+	if (dst->val == 0UL)
+		return -ENOMEM;
+	memcpy((void *) dst->val, RTA_DATA(rta), len);
+	dst->len = len;
+	return 0;
+}
+
+static void meta_var_destroy(struct meta_value *v)
+{
+	if (v->val)
+		kfree((void *) v->val);
+}
+
+static void meta_var_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	int shift = v->hdr.shift;
+
+	if (shift && shift < dst->len)
+		dst->len -= shift;
+}
+
+static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->val && v->len)
+		RTA_PUT(skb, tlv, v->len, (void *) v->val);
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_INT
+ **************************************************************************/
+
+static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	/* Let gcc optimize it, the unlikely is not really based on
+	 * some numbers but jump free code for mismatches seems
+	 * more logical. */
+	if (unlikely(a == b))
+		return 0;
+	else if (a < b)
+		return -1;
+	else
+		return 1;
+}
+
+static int meta_int_change(struct meta_value *dst, struct rtattr *rta)
+{
+	if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) {
+		dst->val = *(unsigned long *) RTA_DATA(rta);
+		dst->len = sizeof(unsigned long);
+	} else if (RTA_PAYLOAD(rta) == sizeof(u32)) {
+		dst->val = *(u32 *) RTA_DATA(rta);
+		dst->len = sizeof(u32);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static void meta_int_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	if (v->hdr.shift)
+		dst->value >>= v->hdr.shift;
+
+	if (v->val)
+		dst->value &= v->val;
+}
+
+static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->len == sizeof(unsigned long))
+		RTA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
+	else if (v->len == sizeof(u32)) {
+		u32 d = v->val;
+		RTA_PUT(skb, tlv, sizeof(d), &d);
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations table
+ **************************************************************************/
+
+struct meta_type_ops
+{
+	void	(*destroy)(struct meta_value *);
+	int	(*compare)(struct meta_obj *, struct meta_obj *);
+	int	(*change)(struct meta_value *, struct rtattr *);
+	void	(*apply_extras)(struct meta_value *, struct meta_obj *);
+	int	(*dump)(struct sk_buff *, struct meta_value *, int);
+};
+
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		.destroy = meta_var_destroy,
+		.compare = meta_var_compare,
+		.change = meta_var_change,
+		.apply_extras = meta_var_apply_extras,
+		.dump = meta_var_dump
+	},
+	[TCF_META_TYPE_INT] = {
+		.compare = meta_int_compare,
+		.change = meta_int_change,
+		.apply_extras = meta_int_apply_extras,
+		.dump = meta_int_dump
+	}
+};
+
+static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+{
+	return &__meta_type_ops[meta_type(v)];
+}
+
+/**************************************************************************
+ * Core
+ **************************************************************************/
+
+static inline int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, 
+			   struct meta_value *v, struct meta_obj *dst)
+{
+	int err = 0;
+
+	if (meta_id(v) == TCF_META_ID_VALUE) {
+		dst->value = v->val;
+		dst->len = v->len;
+		return 0;
+	}
+
+	meta_ops(v)->get(skb, info, v, dst, &err);
+	if (err < 0)
+		return err;
+
+	if (meta_type_ops(v)->apply_extras)
+	    meta_type_ops(v)->apply_extras(v, dst);
+
+	return 0;
+}
+
+static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
+			 struct tcf_pkt_info *info)
+{
+	int r;
+	struct meta_match *meta = (struct meta_match *) m->data;
+	struct meta_obj l_value, r_value;
+
+	if (meta_get(skb, info, &meta->lvalue, &l_value) < 0 ||
+	    meta_get(skb, info, &meta->rvalue, &r_value) < 0)
+		return 0;
+
+	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
+
+	switch (meta->lvalue.hdr.op) {
+		case TCF_EM_OPND_EQ:
+			return !r;
+		case TCF_EM_OPND_LT:
+			return r < 0;
+		case TCF_EM_OPND_GT:
+			return r > 0;
+	}
+
+	return 0;
+}
+
+static inline void meta_delete(struct meta_match *meta)
+{
+	struct meta_type_ops *ops = meta_type_ops(&meta->lvalue);
+
+	if (ops && ops->destroy) {
+		ops->destroy(&meta->lvalue);
+		ops->destroy(&meta->rvalue);
+	}
+
+	kfree(meta);
+}
+
+static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta)
+{
+	if (rta) {
+		if (RTA_PAYLOAD(rta) == 0)
+			return -EINVAL;
+
+		return meta_type_ops(dst)->change(dst, rta);
+	}
+
+	return 0;
+}
+
+static inline int meta_is_supported(struct meta_value *val)
+{
+	return (!meta_id(val) || meta_ops(val)->get);
+}
+
+static int em_meta_change(struct tcf_proto *tp, void *data, int len,
+			  struct tcf_ematch *m)
+{
+	int err = -EINVAL;
+	struct rtattr *tb[TCA_EM_META_MAX];
+	struct tcf_meta_hdr *hdr;
+	struct meta_match *meta = NULL;
+	
+	if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0)
+		goto errout;
+
+	if (tb[TCA_EM_META_HDR-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr))
+		goto errout;
+	hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]);
+
+	if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
+	    TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
+	    TCF_META_ID(hdr->left.kind) > TCF_META_ID_MAX ||
+	    TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
+		goto errout;
+
+	meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+	if (meta == NULL)
+		goto errout;
+	memset(meta, 0, sizeof(*meta));
+
+	memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
+	memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
+
+	if (!meta_is_supported(&meta->lvalue) ||
+	    !meta_is_supported(&meta->rvalue)) {
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+
+	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 ||
+	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0)
+		goto errout;
+
+	m->datalen = sizeof(*meta);
+	m->data = (unsigned long) meta;
+
+	err = 0;
+errout:
+	if (err && meta)
+		meta_delete(meta);
+	return err;
+}
+
+static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+	if (m)
+		meta_delete((struct meta_match *) m->data);
+}
+
+static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct meta_match *meta = (struct meta_match *) em->data;
+	struct tcf_meta_hdr hdr;
+	struct meta_type_ops *ops;
+
+	memset(&hdr, 0, sizeof(hdr));
+	memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
+	memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
+
+	RTA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
+
+	ops = meta_type_ops(&meta->lvalue);
+	if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
+	    ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
+		goto rtattr_failure;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}		
+
+static struct tcf_ematch_ops em_meta_ops = {
+	.kind	  = TCF_EM_META,
+	.change	  = em_meta_change,
+	.match	  = em_meta_match,
+	.destroy  = em_meta_destroy,
+	.dump	  = em_meta_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_meta_ops.link)
+};
+
+static int __init init_em_meta(void)
+{
+	return tcf_em_register(&em_meta_ops);
+}
+
+static void __exit exit_em_meta(void) 
+{
+	tcf_em_unregister(&em_meta_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_meta);
+module_exit(exit_em_meta);
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -0,0 +1,82 @@
+/*
+ * net/sched/em_nbyte.c	N-Byte ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_nbyte.h>
+#include <net/pkt_cls.h>
+
+struct nbyte_data
+{
+	struct tcf_em_nbyte	hdr;
+	char			pattern[0];
+};
+	
+static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
+			   struct tcf_ematch *em)
+{
+	struct tcf_em_nbyte *nbyte = data;
+
+	if (data_len < sizeof(*nbyte) ||
+	    data_len < (sizeof(*nbyte) + nbyte->len))
+		return -EINVAL;
+
+	em->datalen = sizeof(*nbyte) + nbyte->len;
+	em->data = (unsigned long) kmalloc(em->datalen, GFP_KERNEL);
+	if (em->data == 0UL)
+		return -ENOBUFS;
+
+	memcpy((void *) em->data, data, em->datalen);
+
+	return 0;
+}
+
+static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
+			  struct tcf_pkt_info *info)
+{
+	struct nbyte_data *nbyte = (struct nbyte_data *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, nbyte->hdr.layer);
+
+	ptr += nbyte->hdr.off;
+
+	if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
+		return 0;
+
+	return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+}
+
+static struct tcf_ematch_ops em_nbyte_ops = {
+	.kind	  = TCF_EM_NBYTE,
+	.change	  = em_nbyte_change,
+	.match	  = em_nbyte_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_nbyte_ops.link)
+};
+
+static int __init init_em_nbyte(void)
+{
+	return tcf_em_register(&em_nbyte_ops);
+}
+
+static void __exit exit_em_nbyte(void) 
+{
+	tcf_em_unregister(&em_nbyte_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_nbyte);
+module_exit(exit_em_nbyte);
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -0,0 +1,63 @@
+/*
+ * net/sched/em_u32.c	U32 Ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Based on net/sched/cls_u32.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+
+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
+	unsigned char *ptr = skb->nh.raw;
+	
+	if (info) {
+		if (info->ptr)
+			ptr = info->ptr;
+		ptr += (info->nexthdr & key->offmask);
+	}
+
+	ptr += key->off;
+
+	if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
+		return 0;
+	
+	return !(((*(u32*) ptr)  ^ key->val) & key->mask);
+}
+
+static struct tcf_ematch_ops em_u32_ops = {
+	.kind	  = TCF_EM_U32,
+	.datalen  = sizeof(struct tc_u32_key),
+	.match	  = em_u32_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_u32_ops.link)
+};
+
+static int __init init_em_u32(void)
+{
+	return tcf_em_register(&em_u32_ops);
+}
+
+static void __exit exit_em_u32(void) 
+{
+	tcf_em_unregister(&em_u32_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_u32);
+module_exit(exit_em_u32);
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -0,0 +1,524 @@
+/*
+ * net/sched/ematch.c		Extended Match API
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * An extended match (ematch) is a small classification tool not worth
+ * writing a full classifier for. Ematches can be interconnected to form
+ * a logic expression and get attached to classifiers to extend their
+ * functionatlity.
+ *
+ * The userspace part transforms the logic expressions into an array
+ * consisting of multiple sequences of interconnected ematches separated
+ * by markers. Precedence is implemented by a special ematch kind
+ * referencing a sequence beyond the marker of the current sequence
+ * causing the current position in the sequence to be pushed onto a stack
+ * to allow the current position to be overwritten by the position referenced
+ * in the special ematch. Matching continues in the new sequence until a
+ * marker is reached causing the position to be restored from the stack.
+ *
+ * Example:
+ *          A AND (B1 OR B2) AND C AND D
+ *
+ *              ------->-PUSH-------
+ *    -->--    /         -->--      \   -->--
+ *   /     \  /         /     \      \ /     \
+ * +-------+-------+-------+-------+-------+--------+
+ * | A AND | B AND | C AND | D END | B1 OR | B2 END |
+ * +-------+-------+-------+-------+-------+--------+
+ *                    \                      /
+ *                     --------<-POP---------
+ *
+ * where B is a virtual ematch referencing to sequence starting with B1.
+ * 
+ * ==========================================================================
+ *
+ * How to write an ematch in 60 seconds
+ * ------------------------------------
+ * 
+ *   1) Provide a matcher function:
+ *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
+ *                          struct tcf_pkt_info *info)
+ *      {
+ *      	struct mydata *d = (struct mydata *) m->data;
+ *
+ *      	if (...matching goes here...)
+ *      		return 1;
+ *      	else
+ *      		return 0;
+ *      }
+ *
+ *   2) Fill out a struct tcf_ematch_ops:
+ *      static struct tcf_ematch_ops my_ops = {
+ *      	.kind = unique id,
+ *      	.datalen = sizeof(struct mydata),
+ *      	.match = my_match,
+ *      	.owner = THIS_MODULE,
+ *      };
+ *
+ *   3) Register/Unregister your ematch:
+ *      static int __init init_my_ematch(void)
+ *      {
+ *      	return tcf_em_register(&my_ops);
+ *      }
+ *
+ *      static void __exit exit_my_ematch(void)
+ *      {
+ *      	return tcf_em_unregister(&my_ops);
+ *      }
+ *
+ *      module_init(init_my_ematch);
+ *      module_exit(exit_my_ematch);
+ *
+ *   4) By now you should have two more seconds left, barely enough to
+ *      open up a beer to watch the compilation going.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <config/net/ematch/stack.h>
+
+static LIST_HEAD(ematch_ops);
+static DEFINE_RWLOCK(ematch_mod_lock);
+
+static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+{
+	struct tcf_ematch_ops *e = NULL;
+
+	read_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (kind == e->kind) {
+			if (!try_module_get(e->owner))
+				e = NULL;
+			read_unlock(&ematch_mod_lock);
+			return e;
+		}
+	}
+	read_unlock(&ematch_mod_lock);
+
+	return NULL;
+}
+
+/**
+ * tcf_em_register - register an extended match
+ * 
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their presence.
+ * The given @ops must have kind set to a unique identifier and the
+ * callback match() must be implemented. All other callbacks are optional
+ * and a fallback implementation is used instead.
+ *
+ * Returns -EEXISTS if an ematch of the same kind has already registered.
+ */
+int tcf_em_register(struct tcf_ematch_ops *ops)
+{
+	int err = -EEXIST;
+	struct tcf_ematch_ops *e;
+
+	if (ops->match == NULL)
+		return -EINVAL;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link)
+		if (ops->kind == e->kind)
+			goto errout;
+
+	list_add_tail(&ops->link, &ematch_ops);
+	err = 0;
+errout:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+/**
+ * tcf_em_unregister - unregster and extended match
+ *
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their disappearance
+ * for examples when the module gets unloaded. The @ops parameter must be
+ * the same as the one used for registration.
+ *
+ * Returns -ENOENT if no matching ematch was found.
+ */
+int tcf_em_unregister(struct tcf_ematch_ops *ops)
+{
+	int err = 0;
+	struct tcf_ematch_ops *e;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (e == ops) {
+			list_del(&e->link);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
+						   int index)
+{
+	return &tree->matches[index];
+}
+
+
+static int tcf_em_validate(struct tcf_proto *tp,
+			   struct tcf_ematch_tree_hdr *tree_hdr,
+			   struct tcf_ematch *em, struct rtattr *rta, int idx)
+{
+	int err = -EINVAL;
+	struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta);
+	int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr);
+	void *data = (void *) em_hdr + sizeof(*em_hdr);
+
+	if (!TCF_EM_REL_VALID(em_hdr->flags))
+		goto errout;
+
+	if (em_hdr->kind == TCF_EM_CONTAINER) {
+		/* Special ematch called "container", carries an index
+		 * referencing an external ematch sequence. */
+		u32 ref;
+
+		if (data_len < sizeof(ref))
+			goto errout;
+		ref = *(u32 *) data;
+
+		if (ref >= tree_hdr->nmatches)
+			goto errout;
+
+		/* We do not allow backward jumps to avoid loops and jumps
+		 * to our own position are of course illegal. */
+		if (ref <= idx)
+			goto errout;
+
+		
+		em->data = ref;
+	} else {
+		/* Note: This lookup will increase the module refcnt
+		 * of the ematch module referenced. In case of a failure,
+		 * a destroy function is called by the underlying layer
+		 * which automatically releases the reference again, therefore
+		 * the module MUST not be given back under any circumstances
+		 * here. Be aware, the destroy function assumes that the
+		 * module is held if the ops field is non zero. */
+		em->ops = tcf_em_lookup(em_hdr->kind);
+
+		if (em->ops == NULL) {
+			err = -ENOENT;
+			goto errout;
+		}
+
+		/* ematch module provides expected length of data, so we
+		 * can do a basic sanity check. */
+		if (em->ops->datalen && data_len < em->ops->datalen)
+			goto errout;
+
+		if (em->ops->change) {
+			err = em->ops->change(tp, data, data_len, em);
+			if (err < 0)
+				goto errout;
+		} else if (data_len > 0) {
+			/* ematch module doesn't provide an own change
+			 * procedure and expects us to allocate and copy
+			 * the ematch data.
+			 *
+			 * TCF_EM_SIMPLE may be specified stating that the
+			 * data only consists of a u32 integer and the module
+			 * does not expected a memory reference but rather
+			 * the value carried. */
+			if (em_hdr->flags & TCF_EM_SIMPLE) {
+				if (data_len < sizeof(u32))
+					goto errout;
+				em->data = *(u32 *) data;
+			} else {
+				void *v = kmalloc(data_len, GFP_KERNEL);
+				if (v == NULL) {
+					err = -ENOBUFS;
+					goto errout;
+				}
+				memcpy(v, data, data_len);
+				em->data = (unsigned long) v;
+			}
+		}
+	}
+
+	em->matchid = em_hdr->matchid;
+	em->flags = em_hdr->flags;
+	em->datalen = data_len;
+
+	err = 0;
+errout:
+	return err;
+}
+
+/**
+ * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
+ *
+ * @tp: classifier kind handle
+ * @rta: ematch tree configuration TLV
+ * @tree: destination ematch tree variable to store the resulting
+ *        ematch tree.
+ *
+ * This function validates the given configuration TLV @rta and builds an
+ * ematch tree in @tree. The resulting tree must later be copied into
+ * the private classifier data using tcf_em_tree_change(). You MUST NOT
+ * provide the ematch tree variable of the private classifier data directly,
+ * the changes would not be locked properly.
+ *
+ * Returns a negative error code if the configuration TLV contains errors.
+ */
+int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
+			 struct tcf_ematch_tree *tree)
+{
+	int idx, list_len, matches_len, err = -EINVAL;
+	struct rtattr *tb[TCA_EMATCH_TREE_MAX];
+	struct rtattr *rt_match, *rt_hdr, *rt_list;
+	struct tcf_ematch_tree_hdr *tree_hdr;
+	struct tcf_ematch *em;
+
+	if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
+		goto errout;
+
+	rt_hdr = tb[TCA_EMATCH_TREE_HDR-1];
+	rt_list = tb[TCA_EMATCH_TREE_LIST-1];
+
+	if (rt_hdr == NULL || rt_list == NULL)
+		goto errout;
+
+	if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) ||
+	    RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
+		goto errout;
+
+	tree_hdr = RTA_DATA(rt_hdr);
+	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
+
+	rt_match = RTA_DATA(rt_list);
+	list_len = RTA_PAYLOAD(rt_list);
+	matches_len = tree_hdr->nmatches * sizeof(*em);
+
+	tree->matches = kmalloc(matches_len, GFP_KERNEL);
+	if (tree->matches == NULL)
+		goto errout;
+	memset(tree->matches, 0, matches_len);
+
+	/* We do not use rtattr_parse_nested here because the maximum
+	 * number of attributes is unknown. This saves us the allocation
+	 * for a tb buffer which would serve no purpose at all.
+	 * 
+	 * The array of rt attributes is parsed in the order as they are
+	 * provided, their type must be incremental from 1 to n. Even
+	 * if it does not serve any real purpose, a failure of sticking
+	 * to this policy will result in parsing failure. */
+	for (idx = 0; RTA_OK(rt_match, list_len); idx++) {
+		err = -EINVAL;
+
+		if (rt_match->rta_type != (idx + 1))
+			goto errout_abort;
+
+		if (idx >= tree_hdr->nmatches)
+			goto errout_abort;
+
+		if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr))
+			goto errout_abort;
+
+		em = tcf_em_get_match(tree, idx);
+
+		err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
+		if (err < 0)
+			goto errout_abort;
+
+		rt_match = RTA_NEXT(rt_match, list_len);
+	}
+
+	/* Check if the number of matches provided by userspace actually
+	 * complies with the array of matches. The number was used for
+	 * the validation of references and a mismatch could lead to
+	 * undefined references during the matching process. */
+	if (idx != tree_hdr->nmatches) {
+		err = -EINVAL;
+		goto errout_abort;
+	}
+
+	err = 0;
+errout:
+	return err;
+
+errout_abort:
+	tcf_em_tree_destroy(tp, tree);
+	return err;
+}
+
+/**
+ * tcf_em_tree_destroy - destroy an ematch tree
+ *
+ * @tp: classifier kind handle
+ * @tree: ematch tree to be deleted
+ *
+ * This functions destroys an ematch tree previously created by
+ * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
+ * the ematch tree is not in use before calling this function.
+ */
+void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
+{
+	int i;
+
+	if (tree->matches == NULL)
+		return;
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+
+		if (em->ops) {
+			if (em->ops->destroy)
+				em->ops->destroy(tp, em);
+			else if (!tcf_em_is_simple(em) && em->data)
+				kfree((void *) em->data);
+			module_put(em->ops->owner);
+		}
+	}
+	
+	tree->hdr.nmatches = 0;
+	kfree(tree->matches);
+}
+
+/**
+ * tcf_em_tree_dump - dump ematch tree into a rtnl message
+ *
+ * @skb: skb holding the rtnl message
+ * @t: ematch tree to be dumped
+ * @tlv: TLV type to be used to encapsulate the tree
+ *
+ * This function dumps a ematch tree into a rtnl message. It is valid to
+ * call this function while the ematch tree is in use.
+ *
+ * Returns -1 if the skb tailroom is insufficient.
+ */
+int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
+{
+	int i;
+	struct rtattr * top_start = (struct rtattr*) skb->tail;
+	struct rtattr * list_start;
+
+	RTA_PUT(skb, tlv, 0, NULL);
+	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+
+	list_start = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct rtattr *match_start = (struct rtattr*) skb->tail;
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+		struct tcf_ematch_hdr em_hdr = {
+			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
+			.matchid = em->matchid,
+			.flags = em->flags
+		};
+
+		RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+
+		if (em->ops && em->ops->dump) {
+			if (em->ops->dump(skb, em) < 0)
+				goto rtattr_failure;
+		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
+			u32 u = em->data;
+			RTA_PUT_NOHDR(skb, sizeof(u), &u);
+		} else if (em->datalen > 0)
+			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
+
+		match_start->rta_len = skb->tail - (u8*) match_start;
+	}
+
+	list_start->rta_len = skb->tail - (u8 *) list_start;
+	top_start->rta_len = skb->tail - (u8 *) top_start;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
+			       struct tcf_pkt_info *info)
+{
+	int r = em->ops->match(skb, em, info);
+	return tcf_em_is_inverted(em) ? !r : r;
+}
+
+/* Do not use this function directly, use tcf_em_tree_match instead */
+int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
+			struct tcf_pkt_info *info)
+{
+	int stackp = 0, match_idx = 0, res = 0;
+	struct tcf_ematch *cur_match;
+	int stack[CONFIG_NET_EMATCH_STACK];
+
+proceed:
+	while (match_idx < tree->hdr.nmatches) {
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_is_container(cur_match)) {
+			if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
+				goto stack_overflow;
+
+			stack[stackp++] = match_idx;
+			match_idx = cur_match->data;
+			goto proceed;
+		}
+
+		res = tcf_em_match(skb, cur_match, info);
+
+		if (tcf_em_early_end(cur_match, res))
+			break;
+
+		match_idx++;
+	}
+
+pop_stack:
+	if (stackp > 0) {
+		match_idx = stack[--stackp];
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_early_end(cur_match, res))
+			goto pop_stack;
+		else {
+			match_idx++;
+			goto proceed;
+		}
+	}
+
+	return res;
+
+stack_overflow:
+	if (net_ratelimit())
+		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+	return -1;
+}
+
+EXPORT_SYMBOL(tcf_em_register);
+EXPORT_SYMBOL(tcf_em_unregister);
+EXPORT_SYMBOL(tcf_em_tree_validate);
+EXPORT_SYMBOL(tcf_em_tree_destroy);
+EXPORT_SYMBOL(tcf_em_tree_dump);
+EXPORT_SYMBOL(__tcf_em_tree_match);
--- a/net/sched/estimator.c
+++ b/net/sched/estimator.c
@@ -0,0 +1,197 @@
+/*
+ * net/sched/estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * The stored value for avbps is scaled by 2^5, so that maximal
+     rate is ~1Gbit, avpps is scaled by 2^10.
+
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct qdisc_estimator
+{
+	struct qdisc_estimator	*next;
+	struct tc_stats		*stats;
+	spinlock_t		*stats_lock;
+	unsigned		interval;
+	int			ewma_log;
+	u64			last_bytes;
+	u32			last_packets;
+	u32			avpps;
+	u32			avbps;
+};
+
+struct qdisc_estimator_head
+{
+	struct timer_list	timer;
+	struct qdisc_estimator	*list;
+};
+
+static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Estimator array lock */
+static DEFINE_RWLOCK(est_lock);
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct qdisc_estimator *e;
+
+	read_lock(&est_lock);
+	for (e = elist[idx].list; e; e = e->next) {
+		struct tc_stats *st = e->stats;
+		u64 nbytes;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		nbytes = st->bytes;
+		npackets = st->packets;
+		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		st->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+		e->stats->pps = (e->avpps+0x1FF)>>10;
+		spin_unlock(e->stats_lock);
+	}
+
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	read_unlock(&est_lock);
+}
+
+int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
+{
+	struct qdisc_estimator *est;
+	struct tc_estimator *parm = RTA_DATA(opt);
+
+	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	memset(est, 0, sizeof(*est));
+	est->interval = parm->interval + 2;
+	est->stats = stats;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = stats->bytes;
+	est->avbps = stats->bps<<5;
+	est->last_packets = stats->packets;
+	est->avpps = stats->pps<<10;
+
+	est->next = elist[est->interval].list;
+	if (est->next == NULL) {
+		init_timer(&elist[est->interval].timer);
+		elist[est->interval].timer.data = est->interval;
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
+		elist[est->interval].timer.function = est_timer;
+		add_timer(&elist[est->interval].timer);
+	}
+	write_lock_bh(&est_lock);
+	elist[est->interval].list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+void qdisc_kill_estimator(struct tc_stats *stats)
+{
+	int idx;
+	struct qdisc_estimator *est, **pest;
+
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		int killed = 0;
+		pest = &elist[idx].list;
+		while ((est=*pest) != NULL) {
+			if (est->stats != stats) {
+				pest = &est->next;
+				continue;
+			}
+
+			write_lock_bh(&est_lock);
+			*pest = est->next;
+			write_unlock_bh(&est_lock);
+
+			kfree(est);
+			killed++;
+		}
+		if (killed && elist[idx].list == NULL)
+			del_timer(&elist[idx].timer);
+	}
+}
+
+EXPORT_SYMBOL(qdisc_kill_estimator);
+EXPORT_SYMBOL(qdisc_new_estimator);
--- a/net/sched/gact.c
+++ b/net/sched/gact.c
@@ -0,0 +1,231 @@
+/*
+ * net/sched/gact.c	Generic actions
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * copyright 	Jamal Hadi Salim (2002-4)
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_gact.h>
+#include <net/tc_act/tc_gact.h>
+
+/* use generic hash table */
+#define MY_TAB_SIZE	16
+#define MY_TAB_MASK	15
+
+static u32 idx_gen;
+static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE];
+static DEFINE_RWLOCK(gact_lock);
+
+/* ovewrride the defaults */
+#define tcf_st		tcf_gact
+#define tc_st		tc_gact
+#define tcf_t_lock	gact_lock
+#define tcf_ht		tcf_gact_ht
+
+#define CONFIG_NET_ACT_INIT 1
+#include <net/pkt_act.h>
+
+#ifdef CONFIG_GACT_PROB
+static int gact_net_rand(struct tcf_gact *p)
+{
+	if (net_random()%p->pval)
+		return p->action;
+	return p->paction;
+}
+
+static int gact_determ(struct tcf_gact *p)
+{
+	if (p->bstats.packets%p->pval)
+		return p->action;
+	return p->paction;
+}
+
+typedef int (*g_rand)(struct tcf_gact *p);
+static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+#endif
+
+static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
+                         struct tc_action *a, int ovr, int bind)
+{
+	struct rtattr *tb[TCA_GACT_MAX];
+	struct tc_gact *parm;
+	struct tcf_gact *p;
+	int ret = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_GACT_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]);
+
+	if (tb[TCA_GACT_PROB-1] != NULL)
+#ifdef CONFIG_GACT_PROB
+		if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p))
+			return -EINVAL;
+#else
+		return -EOPNOTSUPP;
+#endif
+
+	p = tcf_hash_check(parm->index, a, ovr, bind);
+	if (p == NULL) {
+		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_hash_release(p, bind);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&p->lock);
+	p->action = parm->action;
+#ifdef CONFIG_GACT_PROB
+	if (tb[TCA_GACT_PROB-1] != NULL) {
+		struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
+		p->paction = p_parm->paction;
+		p->pval    = p_parm->pval;
+		p->ptype   = p_parm->ptype;
+	}
+#endif
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+	return ret;
+}
+
+static int
+tcf_gact_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_gact *p = PRIV(a, gact);
+
+	if (p != NULL)
+		return tcf_hash_release(p, bind);
+	return 0;
+}
+
+static int
+tcf_gact(struct sk_buff **pskb, struct tc_action *a)
+{
+	struct tcf_gact *p = PRIV(a, gact);
+	struct sk_buff *skb = *pskb;
+	int action = TC_ACT_SHOT;
+
+	spin_lock(&p->lock);
+#ifdef CONFIG_GACT_PROB
+	if (p->ptype && gact_rand[p->ptype] != NULL)
+		action = gact_rand[p->ptype](p);
+	else
+		action = p->action;
+#else
+	action = p->action;
+#endif
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+	if (action == TC_ACT_SHOT)
+		p->qstats.drops++;
+	p->tm.lastuse = jiffies;
+	spin_unlock(&p->lock);
+
+	return action;
+}
+
+static int
+tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tc_gact opt;
+	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_t t;
+
+	opt.index = p->index;
+	opt.refcnt = p->refcnt - ref;
+	opt.bindcnt = p->bindcnt - bind;
+	opt.action = p->action;
+	RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
+#ifdef CONFIG_GACT_PROB
+	if (p->ptype) {
+		struct tc_gact_p p_opt;
+		p_opt.paction = p->paction;
+		p_opt.pval = p->pval;
+		p_opt.ptype = p->ptype;
+		RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
+	}
+#endif
+	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
+	return skb->len;
+
+      rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tc_action_ops act_gact_ops = {
+	.kind		=	"gact",
+	.type		=	TCA_ACT_GACT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_gact,
+	.dump		=	tcf_gact_dump,
+	.cleanup	=	tcf_gact_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_gact_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Generic Classifier actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+gact_init_module(void)
+{
+#ifdef CONFIG_GACT_PROB
+	printk("GACT probability on\n");
+#else
+	printk("GACT probability NOT on\n");
+#endif
+	return tcf_register_action(&act_gact_ops);
+}
+
+static void __exit
+gact_cleanup_module(void)
+{
+	tcf_unregister_action(&act_gact_ops);
+}
+
+module_init(gact_init_module);
+module_exit(gact_cleanup_module);
--- a/net/sched/ipt.c
+++ b/net/sched/ipt.c
@@ -0,0 +1,326 @@
+/*
+ * net/sched/ipt.c	iptables target interface
+ *
+ *TODO: Add other tables. For now we only support the ipv4 table targets
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Copyright:	Jamal Hadi Salim (2002-4)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/kmod.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_ipt.h>
+#include <net/tc_act/tc_ipt.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+/* use generic hash table */
+#define MY_TAB_SIZE     16
+#define MY_TAB_MASK     15
+
+static u32 idx_gen;
+static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE];
+/* ipt hash table lock */
+static DEFINE_RWLOCK(ipt_lock);
+
+/* ovewrride the defaults */
+#define tcf_st		tcf_ipt
+#define tcf_t_lock	ipt_lock
+#define tcf_ht		tcf_ipt_ht
+
+#define CONFIG_NET_ACT_INIT
+#include <net/pkt_act.h>
+
+static int
+ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+{
+	struct ipt_target *target;
+	int ret = 0;
+
+	target = ipt_find_target(t->u.user.name, t->u.user.revision);
+	if (!target)
+		return -ENOENT;
+
+	DPRINTK("ipt_init_target: found %s\n", target->name);
+	t->u.kernel.target = target;
+
+	if (t->u.kernel.target->checkentry
+	    && !t->u.kernel.target->checkentry(table, NULL, t->data,
+					       t->u.target_size - sizeof(*t),
+					       hook)) {
+		DPRINTK("ipt_init_target: check failed for `%s'.\n",
+			t->u.kernel.target->name);
+		module_put(t->u.kernel.target->me);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void
+ipt_destroy_target(struct ipt_entry_target *t)
+{
+	if (t->u.kernel.target->destroy)
+		t->u.kernel.target->destroy(t->data,
+		                            t->u.target_size - sizeof(*t));
+        module_put(t->u.kernel.target->me);
+}
+
+static int
+tcf_ipt_release(struct tcf_ipt *p, int bind)
+{
+	int ret = 0;
+	if (p) {
+		if (bind)
+			p->bindcnt--;
+		p->refcnt--;
+		if (p->bindcnt <= 0 && p->refcnt <= 0) {
+			ipt_destroy_target(p->t);
+			kfree(p->tname);
+			kfree(p->t);
+			tcf_hash_destroy(p);
+			ret = ACT_P_DELETED;
+		}
+	}
+	return ret;
+}
+
+static int
+tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+             int ovr, int bind)
+{
+	struct rtattr *tb[TCA_IPT_MAX];
+	struct tcf_ipt *p;
+	struct ipt_entry_target *td, *t;
+	char *tname;
+	int ret = 0, err;
+	u32 hook = 0;
+	u32 index = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_IPT_HOOK-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32))
+		return -EINVAL;
+	if (tb[TCA_IPT_TARG-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
+		return -EINVAL;
+	td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]);
+	if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size)
+		return -EINVAL;
+
+	if (tb[TCA_IPT_INDEX-1] != NULL &&
+	    RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
+		index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
+
+	p = tcf_hash_check(index, a, ovr, bind);
+	if (p == NULL) {
+		p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_ipt_release(p, bind);
+			return -EEXIST;
+		}
+	}
+
+	hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
+
+	err = -ENOMEM;
+	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
+	if (tname == NULL)
+		goto err1;
+	if (tb[TCA_IPT_TABLE - 1] == NULL ||
+	    rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
+		strcpy(tname, "mangle");
+
+	t = kmalloc(td->u.target_size, GFP_KERNEL);
+	if (t == NULL)
+		goto err2;
+	memcpy(t, td, td->u.target_size);
+
+	if ((err = ipt_init_target(t, tname, hook)) < 0)
+		goto err3;
+
+	spin_lock_bh(&p->lock);
+	if (ret != ACT_P_CREATED) {
+		ipt_destroy_target(p->t);
+		kfree(p->tname);
+		kfree(p->t);
+	}
+	p->tname = tname;
+	p->t     = t;
+	p->hook  = hook;
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+	return ret;
+
+err3:
+	kfree(t);
+err2:
+	kfree(tname);
+err1:
+	kfree(p);
+	return err;
+}
+
+static int
+tcf_ipt_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_ipt *p = PRIV(a, ipt);
+	return tcf_ipt_release(p, bind);
+}
+
+static int
+tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
+{
+	int ret = 0, result = 0;
+	struct tcf_ipt *p = PRIV(a, ipt);
+	struct sk_buff *skb = *pskb;
+
+	if (skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return TC_ACT_UNSPEC;
+	}
+
+	spin_lock(&p->lock);
+
+	p->tm.lastuse = jiffies;
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+
+	/* yes, we have to worry about both in and out dev
+	 worry later - danger - this API seems to have changed
+	 from earlier kernels */
+
+	ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL,
+					    p->hook, p->t->data, NULL);
+	switch (ret) {
+	case NF_ACCEPT:
+		result = TC_ACT_OK;
+		break;
+	case NF_DROP:
+		result = TC_ACT_SHOT;
+		p->qstats.drops++;
+		break;
+	case IPT_CONTINUE:
+		result = TC_ACT_PIPE;
+		break;
+	default:
+		if (net_ratelimit())
+			printk("Bogus netfilter code %d assume ACCEPT\n", ret);
+		result = TC_POLICE_OK;
+		break;
+	}
+	spin_unlock(&p->lock);
+	return result;
+
+}
+
+static int
+tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	struct ipt_entry_target *t;
+	struct tcf_t tm;
+	struct tc_cnt c;
+	unsigned char *b = skb->tail;
+	struct tcf_ipt *p = PRIV(a, ipt);
+
+	/* for simple targets kernel size == user size
+	** user name = target name
+	** for foolproof you need to not assume this
+	*/
+
+	t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC);
+	if (t == NULL)
+		goto rtattr_failure;
+
+	c.bindcnt = p->bindcnt - bind;
+	c.refcnt = p->refcnt - ref;
+	memcpy(t, p->t, p->t->u.user.target_size);
+	strcpy(t->u.user.name, p->t->u.kernel.target->name);
+
+	DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname,
+		strlen(p->tname));
+	DPRINTK("\tdump target name %s size %d size user %d "
+	        "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
+	        p->t->u.target_size, p->t->u.user.target_size,
+	        p->t->data[0], p->t->data[1]);
+	RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
+	RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
+	RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
+	RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
+	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname);
+	tm.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	tm.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
+	kfree(t);
+	return skb->len;
+
+      rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	kfree(t);
+	return -1;
+}
+
+static struct tc_action_ops act_ipt_ops = {
+	.kind		=	"ipt",
+	.type		=	TCA_ACT_IPT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_ipt,
+	.dump		=	tcf_ipt_dump,
+	.cleanup	=	tcf_ipt_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_ipt_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Iptables target actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+ipt_init_module(void)
+{
+	return tcf_register_action(&act_ipt_ops);
+}
+
+static void __exit
+ipt_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ipt_ops);
+}
+
+module_init(ipt_init_module);
+module_exit(ipt_cleanup_module);
--- a/net/sched/mirred.c
+++ b/net/sched/mirred.c
@@ -0,0 +1,276 @@
+/*
+ * net/sched/mirred.c	packet mirroring and redirect actions
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2002-4)
+ *
+ * TODO: Add ingress support (and socket redirect support)
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+
+
+/* use generic hash table */
+#define MY_TAB_SIZE     8
+#define MY_TAB_MASK     (MY_TAB_SIZE - 1)
+static u32 idx_gen;
+static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE];
+static DEFINE_RWLOCK(mirred_lock);
+
+/* ovewrride the defaults */
+#define tcf_st		tcf_mirred
+#define tc_st		tc_mirred
+#define tcf_t_lock	mirred_lock
+#define tcf_ht		tcf_mirred_ht
+
+#define CONFIG_NET_ACT_INIT 1
+#include <net/pkt_act.h>
+
+static inline int
+tcf_mirred_release(struct tcf_mirred *p, int bind)
+{
+	if (p) {
+		if (bind)
+			p->bindcnt--;
+		p->refcnt--;
+		if(!p->bindcnt && p->refcnt <= 0) {
+			dev_put(p->dev);
+			tcf_hash_destroy(p);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+                int ovr, int bind)
+{
+	struct rtattr *tb[TCA_MIRRED_MAX];
+	struct tc_mirred *parm;
+	struct tcf_mirred *p;
+	struct net_device *dev = NULL;
+	int ret = 0;
+	int ok_push = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_MIRRED_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_MIRRED_PARMS-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_MIRRED_PARMS-1]) < sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]);
+
+	if (parm->ifindex) {
+		dev = __dev_get_by_index(parm->ifindex);
+		if (dev == NULL)
+			return -ENODEV;
+		switch (dev->type) {
+			case ARPHRD_TUNNEL:
+			case ARPHRD_TUNNEL6:
+			case ARPHRD_SIT:
+			case ARPHRD_IPGRE:
+			case ARPHRD_VOID:
+			case ARPHRD_NONE:
+				ok_push = 0;
+				break;
+			default:
+				ok_push = 1;
+				break;
+		}
+	}
+
+	p = tcf_hash_check(parm->index, a, ovr, bind);
+	if (p == NULL) {
+		if (!parm->ifindex)
+			return -EINVAL;
+		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_mirred_release(p, bind);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&p->lock);
+	p->action = parm->action;
+	p->eaction = parm->eaction;
+	if (parm->ifindex) {
+		p->ifindex = parm->ifindex;
+		if (ret != ACT_P_CREATED)
+			dev_put(p->dev);
+		p->dev = dev;
+		dev_hold(dev);
+		p->ok_push = ok_push;
+	}
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+
+	DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s "
+	        "ifindex %d\n", parm->index, parm->action, parm->eaction,
+	        dev->name, parm->ifindex);
+	return ret;
+}
+
+static int
+tcf_mirred_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_mirred *p = PRIV(a, mirred);
+
+	if (p != NULL)
+		return tcf_mirred_release(p, bind);
+	return 0;
+}
+
+static int
+tcf_mirred(struct sk_buff **pskb, struct tc_action *a)
+{
+	struct tcf_mirred *p = PRIV(a, mirred);
+	struct net_device *dev;
+	struct sk_buff *skb2 = NULL;
+	struct sk_buff *skb = *pskb;
+	u32 at = G_TC_AT(skb->tc_verd);
+
+	spin_lock(&p->lock);
+
+	dev = p->dev;
+	p->tm.lastuse = jiffies;
+
+	if (!(dev->flags&IFF_UP) ) {
+		if (net_ratelimit())
+			printk("mirred to Houston: device %s is gone!\n",
+			       dev->name);
+bad_mirred:
+		if (skb2 != NULL)
+			kfree_skb(skb2);
+		p->qstats.overlimits++;
+		p->bstats.bytes += skb->len;
+		p->bstats.packets++;
+		spin_unlock(&p->lock);
+		/* should we be asking for packet to be dropped?
+		 * may make sense for redirect case only
+		*/
+		return TC_ACT_SHOT;
+	}
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		goto bad_mirred;
+	if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) {
+		if (net_ratelimit())
+			printk("tcf_mirred unknown action %d\n", p->eaction);
+		goto bad_mirred;
+	}
+
+	p->bstats.bytes += skb2->len;
+	p->bstats.packets++;
+	if (!(at & AT_EGRESS))
+		if (p->ok_push)
+			skb_push(skb2, skb2->dev->hard_header_len);
+
+	/* mirror is always swallowed */
+	if (p->eaction != TCA_EGRESS_MIRROR)
+		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
+
+	skb2->dev = dev;
+	skb2->input_dev = skb->dev;
+	dev_queue_xmit(skb2);
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+static int
+tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tc_mirred opt;
+	struct tcf_mirred *p = PRIV(a, mirred);
+	struct tcf_t t;
+
+	opt.index = p->index;
+	opt.action = p->action;
+	opt.refcnt = p->refcnt - ref;
+	opt.bindcnt = p->bindcnt - bind;
+	opt.eaction = p->eaction;
+	opt.ifindex = p->ifindex;
+	DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n",
+	         p->index, p->action, p->eaction, p->ifindex);
+	RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
+	return skb->len;
+
+      rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tc_action_ops act_mirred_ops = {
+	.kind		=	"mirred",
+	.type		=	TCA_ACT_MIRRED,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_mirred,
+	.dump		=	tcf_mirred_dump,
+	.cleanup	=	tcf_mirred_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_mirred_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002)");
+MODULE_DESCRIPTION("Device Mirror/redirect actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+mirred_init_module(void)
+{
+	printk("Mirror/redirect action on\n");
+	return tcf_register_action(&act_mirred_ops);
+}
+
+static void __exit
+mirred_cleanup_module(void)
+{
+	tcf_unregister_action(&act_mirred_ops);
+}
+
+module_init(mirred_init_module);
+module_exit(mirred_cleanup_module);
--- a/net/sched/pedit.c
+++ b/net/sched/pedit.c
@@ -0,0 +1,288 @@
+/*
+ * net/sched/pedit.c	Generic packet editor
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2002-4)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_pedit.h>
+
+
+#define PEDIT_DEB 1
+
+/* use generic hash table */
+#define MY_TAB_SIZE     16
+#define MY_TAB_MASK     15
+static u32 idx_gen;
+static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE];
+static DEFINE_RWLOCK(pedit_lock);
+
+#define tcf_st		tcf_pedit
+#define tc_st		tc_pedit
+#define tcf_t_lock	pedit_lock
+#define tcf_ht		tcf_pedit_ht
+
+#define CONFIG_NET_ACT_INIT 1
+#include <net/pkt_act.h>
+
+static int
+tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+               int ovr, int bind)
+{
+	struct rtattr *tb[TCA_PEDIT_MAX];
+	struct tc_pedit *parm;
+	int ret = 0;
+	struct tcf_pedit *p;
+	struct tc_pedit_key *keys = NULL;
+	int ksize;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_PEDIT_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_PEDIT_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_PEDIT_PARMS-1]);
+	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
+	if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
+		return -EINVAL;
+
+	p = tcf_hash_check(parm->index, a, ovr, bind);
+	if (p == NULL) {
+		if (!parm->nkeys)
+			return -EINVAL;
+		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		keys = kmalloc(ksize, GFP_KERNEL);
+		if (keys == NULL) {
+			kfree(p);
+			return -ENOMEM;
+		}
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_hash_release(p, bind);
+			return -EEXIST;
+		}
+		if (p->nkeys && p->nkeys != parm->nkeys) {
+			keys = kmalloc(ksize, GFP_KERNEL);
+			if (keys == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	spin_lock_bh(&p->lock);
+	p->flags = parm->flags;
+	p->action = parm->action;
+	if (keys) {
+		kfree(p->keys);
+		p->keys = keys;
+		p->nkeys = parm->nkeys;
+	}
+	memcpy(p->keys, parm->keys, ksize);
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+	return ret;
+}
+
+static int
+tcf_pedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_pedit *p = PRIV(a, pedit);
+
+	if (p != NULL) {
+		struct tc_pedit_key *keys = p->keys;
+		if (tcf_hash_release(p, bind)) {
+			kfree(keys);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+tcf_pedit(struct sk_buff **pskb, struct tc_action *a)
+{
+	struct tcf_pedit *p = PRIV(a, pedit);
+	struct sk_buff *skb = *pskb;
+	int i, munged = 0;
+	u8 *pptr;
+
+	if (!(skb->tc_verd & TC_OK2MUNGE)) {
+		/* should we set skb->cloned? */
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+			return p->action;
+		}
+	}
+
+	pptr = skb->nh.raw;
+
+	spin_lock(&p->lock);
+
+	p->tm.lastuse = jiffies;
+
+	if (p->nkeys > 0) {
+		struct tc_pedit_key *tkey = p->keys;
+
+		for (i = p->nkeys; i > 0; i--, tkey++) {
+			u32 *ptr;
+			int offset = tkey->off;
+
+			if (tkey->offmask) {
+				if (skb->len > tkey->at) {
+					 char *j = pptr + tkey->at;
+					 offset += ((*j & tkey->offmask) >> 
+					           tkey->shift);
+				} else {
+					goto bad;
+				}
+			}
+
+			if (offset % 4) {
+				printk("offset must be on 32 bit boundaries\n");
+				goto bad;
+			}
+			if (skb->len < 0 || (offset > 0 && offset > skb->len)) {
+				printk("offset %d cant exceed pkt length %d\n",
+				       offset, skb->len);
+				goto bad;
+			}
+
+			ptr = (u32 *)(pptr+offset);
+			/* just do it, baby */
+			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
+			munged++;
+		}
+		
+		if (munged)
+			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
+		goto done;
+	} else {
+		printk("pedit BUG: index %d\n",p->index);
+	}
+
+bad:
+	p->qstats.overlimits++;
+done:
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+static int
+tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tc_pedit *opt;
+	struct tcf_pedit *p = PRIV(a, pedit);
+	struct tcf_t t;
+	int s; 
+		
+	s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key);
+
+	/* netlink spinlocks held above us - must use ATOMIC */
+	opt = kmalloc(s, GFP_ATOMIC);
+	if (opt == NULL)
+		return -ENOBUFS;
+	memset(opt, 0, s);
+
+	memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key));
+	opt->index = p->index;
+	opt->nkeys = p->nkeys;
+	opt->flags = p->flags;
+	opt->action = p->action;
+	opt->refcnt = p->refcnt - ref;
+	opt->bindcnt = p->bindcnt - bind;
+
+
+#ifdef PEDIT_DEB
+	{                
+		/* Debug - get rid of later */
+		int i;
+		struct tc_pedit_key *key = opt->keys;
+
+		for (i=0; i<opt->nkeys; i++, key++) {
+			printk( "\n key #%d",i);
+			printk( "  at %d: val %08x mask %08x",
+			(unsigned int)key->off,
+			(unsigned int)key->val,
+			(unsigned int)key->mask);
+		}
+	}
+#endif
+
+	RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static
+struct tc_action_ops act_pedit_ops = {
+	.kind		=	"pedit",
+	.type		=	TCA_ACT_PEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_pedit,
+	.dump		=	tcf_pedit_dump,
+	.cleanup	=	tcf_pedit_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_pedit_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Generic Packet Editor actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+pedit_init_module(void)
+{
+	return tcf_register_action(&act_pedit_ops);
+}
+
+static void __exit
+pedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_pedit_ops);
+}
+
+module_init(pedit_init_module);
+module_exit(pedit_cleanup_module);
+
--- a/net/sched/police.c
+++ b/net/sched/police.c
@@ -0,0 +1,612 @@
+/*
+ * net/sched/police.c	Input police filter.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * 		J Hadi Salim (action changes)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+
+#define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
+#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
+#define PRIV(a) ((struct tcf_police *) (a)->priv)
+
+/* use generic hash table */
+#define MY_TAB_SIZE     16
+#define MY_TAB_MASK     15
+static u32 idx_gen;
+static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
+/* Policer hash table lock */
+static DEFINE_RWLOCK(police_lock);
+
+/* Each policer is serialized by its individual spinlock */
+
+static __inline__ unsigned tcf_police_hash(u32 index)
+{
+	return index&0xF;
+}
+
+static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
+{
+	struct tcf_police *p;
+
+	read_lock(&police_lock);
+	for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
+		if (p->index == index)
+			break;
+	}
+	read_unlock(&police_lock);
+	return p;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+                              int type, struct tc_action *a)
+{
+	struct tcf_police *p;
+	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	struct rtattr *r;
+
+	read_lock(&police_lock);
+
+	s_i = cb->args[0];
+
+	for (i = 0; i < MY_TAB_SIZE; i++) {
+		p = tcf_police_ht[tcf_police_hash(i)];
+
+		for (; p; p = p->next) {
+			index++;
+			if (index < s_i)
+				continue;
+			a->priv = p;
+			a->order = index;
+			r = (struct rtattr*) skb->tail;
+			RTA_PUT(skb, a->order, 0, NULL);
+			if (type == RTM_DELACTION)
+				err = tcf_action_dump_1(skb, a, 0, 1);
+			else
+				err = tcf_action_dump_1(skb, a, 0, 0);
+			if (err < 0) {
+				index--;
+				skb_trim(skb, (u8*)r - skb->data);
+				goto done;
+			}
+			r->rta_len = skb->tail - (u8*)r;
+			n_i++;
+		}
+	}
+done:
+	read_unlock(&police_lock);
+	if (n_i)
+		cb->args[0] += n_i;
+	return n_i;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)r - skb->data);
+	goto done;
+}
+
+static inline int
+tcf_hash_search(struct tc_action *a, u32 index)
+{
+	struct tcf_police *p = tcf_police_lookup(index);
+
+	if (p != NULL) {
+		a->priv = p;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+#endif
+
+static inline u32 tcf_police_new_index(void)
+{
+	do {
+		if (++idx_gen == 0)
+			idx_gen = 1;
+	} while (tcf_police_lookup(idx_gen));
+
+	return idx_gen;
+}
+
+void tcf_police_destroy(struct tcf_police *p)
+{
+	unsigned h = tcf_police_hash(p->index);
+	struct tcf_police **p1p;
+	
+	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
+		if (*p1p == p) {
+			write_lock_bh(&police_lock);
+			*p1p = p->next;
+			write_unlock_bh(&police_lock);
+#ifdef CONFIG_NET_ESTIMATOR
+			gen_kill_estimator(&p->bstats, &p->rate_est);
+#endif
+			if (p->R_tab)
+				qdisc_put_rtab(p->R_tab);
+			if (p->P_tab)
+				qdisc_put_rtab(p->P_tab);
+			kfree(p);
+			return;
+		}
+	}
+	BUG_TRAP(0);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
+                                 struct tc_action *a, int ovr, int bind)
+{
+	unsigned h;
+	int ret = 0, err;
+	struct rtattr *tb[TCA_POLICE_MAX];
+	struct tc_police *parm;
+	struct tcf_police *p;
+	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_POLICE_TBF-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+
+	if (tb[TCA_POLICE_RESULT-1] != NULL &&
+	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+		return -EINVAL;
+	if (tb[TCA_POLICE_RESULT-1] != NULL &&
+	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+		return -EINVAL;
+
+	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
+		a->priv = p;
+		if (bind) {
+			p->bindcnt += 1;
+			p->refcnt += 1;
+		}
+		if (ovr)
+			goto override;
+		return ret;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return -ENOMEM;
+	memset(p, 0, sizeof(*p));
+
+	ret = ACT_P_CREATED;
+	p->refcnt = 1;
+	spin_lock_init(&p->lock);
+	p->stats_lock = &p->lock;
+	if (bind)
+		p->bindcnt = 1;
+override:
+	if (parm->rate.rate) {
+		err = -ENOMEM;
+		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		if (R_tab == NULL)
+			goto failure;
+		if (parm->peakrate.rate) {
+			P_tab = qdisc_get_rtab(&parm->peakrate,
+					       tb[TCA_POLICE_PEAKRATE-1]);
+			if (p->P_tab == NULL) {
+				qdisc_put_rtab(R_tab);
+				goto failure;
+			}
+		}
+	}
+	/* No failure allowed after this point */
+	spin_lock_bh(&p->lock);
+	if (R_tab != NULL) {
+		qdisc_put_rtab(p->R_tab);
+		p->R_tab = R_tab;
+	}
+	if (P_tab != NULL) {
+		qdisc_put_rtab(p->P_tab);
+		p->P_tab = P_tab;
+	}
+
+	if (tb[TCA_POLICE_RESULT-1])
+		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	p->toks = p->burst = parm->burst;
+	p->mtu = parm->mtu;
+	if (p->mtu == 0) {
+		p->mtu = ~0;
+		if (p->R_tab)
+			p->mtu = 255<<p->R_tab->rate.cell_log;
+	}
+	if (p->P_tab)
+		p->ptoks = L2T_P(p, p->mtu);
+	p->action = parm->action;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tb[TCA_POLICE_AVRATE-1])
+		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+	if (est)
+		gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+#endif
+
+	spin_unlock_bh(&p->lock);
+	if (ret != ACT_P_CREATED)
+		return ret;
+
+	PSCHED_GET_TIME(p->t_c);
+	p->index = parm->index ? : tcf_police_new_index();
+	h = tcf_police_hash(p->index);
+	write_lock_bh(&police_lock);
+	p->next = tcf_police_ht[h];
+	tcf_police_ht[h] = p;
+	write_unlock_bh(&police_lock);
+
+	a->priv = p;
+	return ret;
+
+failure:
+	if (ret == ACT_P_CREATED)
+		kfree(p);
+	return err;
+}
+
+static int tcf_act_police_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_police *p = PRIV(a);
+
+	if (p != NULL)
+		return tcf_police_release(p, bind);
+	return 0;
+}
+
+static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
+{
+	psched_time_t now;
+	struct sk_buff *skb = *pskb;
+	struct tcf_police *p = PRIV(a);
+	long toks;
+	long ptoks = 0;
+
+	spin_lock(&p->lock);
+
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
+		p->qstats.overlimits++;
+		spin_unlock(&p->lock);
+		return p->action;
+	}
+#endif
+
+	if (skb->len <= p->mtu) {
+		if (p->R_tab == NULL) {
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+
+		PSCHED_GET_TIME(now);
+
+		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
+
+		if (p->P_tab) {
+			ptoks = toks + p->ptoks;
+			if (ptoks > (long)L2T_P(p, p->mtu))
+				ptoks = (long)L2T_P(p, p->mtu);
+			ptoks -= L2T_P(p, skb->len);
+		}
+		toks += p->toks;
+		if (toks > (long)p->burst)
+			toks = p->burst;
+		toks -= L2T(p, skb->len);
+
+		if ((toks|ptoks) >= 0) {
+			p->t_c = now;
+			p->toks = toks;
+			p->ptoks = ptoks;
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+	}
+
+	p->qstats.overlimits++;
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+static int
+tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_police opt;
+	struct tcf_police *p = PRIV(a);
+
+	opt.index = p->index;
+	opt.action = p->action;
+	opt.mtu = p->mtu;
+	opt.burst = p->burst;
+	opt.refcnt = p->refcnt - ref;
+	opt.bindcnt = p->bindcnt - bind;
+	if (p->R_tab)
+		opt.rate = p->R_tab->rate;
+	else
+		memset(&opt.rate, 0, sizeof(opt.rate));
+	if (p->P_tab)
+		opt.peakrate = p->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+	if (p->result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+MODULE_AUTHOR("Alexey Kuznetsov");
+MODULE_DESCRIPTION("Policing actions");
+MODULE_LICENSE("GPL");
+
+static struct tc_action_ops act_police_ops = {
+	.kind		=	"police",
+	.type		=	TCA_ID_POLICE,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_act_police,
+	.dump		=	tcf_act_police_dump,
+	.cleanup	=	tcf_act_police_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_act_police_locate,
+	.walk		=	tcf_generic_walker
+};
+
+static int __init
+police_init_module(void)
+{
+	return tcf_register_action(&act_police_ops);
+}
+
+static void __exit
+police_cleanup_module(void)
+{
+	tcf_unregister_action(&act_police_ops);
+}
+
+module_init(police_init_module);
+module_exit(police_cleanup_module);
+
+#endif
+
+struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+{
+	unsigned h;
+	struct tcf_police *p;
+	struct rtattr *tb[TCA_POLICE_MAX];
+	struct tc_police *parm;
+
+	if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+		return NULL;
+
+	if (tb[TCA_POLICE_TBF-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+		return NULL;
+
+	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+
+	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
+		p->refcnt++;
+		return p;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return NULL;
+
+	memset(p, 0, sizeof(*p));
+	p->refcnt = 1;
+	spin_lock_init(&p->lock);
+	p->stats_lock = &p->lock;
+	if (parm->rate.rate) {
+		p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		if (p->R_tab == NULL)
+			goto failure;
+		if (parm->peakrate.rate) {
+			p->P_tab = qdisc_get_rtab(&parm->peakrate,
+			                          tb[TCA_POLICE_PEAKRATE-1]);
+			if (p->P_tab == NULL)
+				goto failure;
+		}
+	}
+	if (tb[TCA_POLICE_RESULT-1]) {
+		if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+			goto failure;
+		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	}
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tb[TCA_POLICE_AVRATE-1]) {
+		if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
+			goto failure;
+		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+	}
+#endif
+	p->toks = p->burst = parm->burst;
+	p->mtu = parm->mtu;
+	if (p->mtu == 0) {
+		p->mtu = ~0;
+		if (p->R_tab)
+			p->mtu = 255<<p->R_tab->rate.cell_log;
+	}
+	if (p->P_tab)
+		p->ptoks = L2T_P(p, p->mtu);
+	PSCHED_GET_TIME(p->t_c);
+	p->index = parm->index ? : tcf_police_new_index();
+	p->action = parm->action;
+#ifdef CONFIG_NET_ESTIMATOR
+	if (est)
+		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+#endif
+	h = tcf_police_hash(p->index);
+	write_lock_bh(&police_lock);
+	p->next = tcf_police_ht[h];
+	tcf_police_ht[h] = p;
+	write_unlock_bh(&police_lock);
+	return p;
+
+failure:
+	if (p->R_tab)
+		qdisc_put_rtab(p->R_tab);
+	kfree(p);
+	return NULL;
+}
+
+int tcf_police(struct sk_buff *skb, struct tcf_police *p)
+{
+	psched_time_t now;
+	long toks;
+	long ptoks = 0;
+
+	spin_lock(&p->lock);
+
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
+		p->qstats.overlimits++;
+		spin_unlock(&p->lock);
+		return p->action;
+	}
+#endif
+
+	if (skb->len <= p->mtu) {
+		if (p->R_tab == NULL) {
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+
+		PSCHED_GET_TIME(now);
+
+		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
+
+		if (p->P_tab) {
+			ptoks = toks + p->ptoks;
+			if (ptoks > (long)L2T_P(p, p->mtu))
+				ptoks = (long)L2T_P(p, p->mtu);
+			ptoks -= L2T_P(p, skb->len);
+		}
+		toks += p->toks;
+		if (toks > (long)p->burst)
+			toks = p->burst;
+		toks -= L2T(p, skb->len);
+
+		if ((toks|ptoks) >= 0) {
+			p->t_c = now;
+			p->toks = toks;
+			p->ptoks = ptoks;
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+	}
+
+	p->qstats.overlimits++;
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_police opt;
+
+	opt.index = p->index;
+	opt.action = p->action;
+	opt.mtu = p->mtu;
+	opt.burst = p->burst;
+	if (p->R_tab)
+		opt.rate = p->R_tab->rate;
+	else
+		memset(&opt.rate, 0, sizeof(opt.rate));
+	if (p->P_tab)
+		opt.peakrate = p->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+	if (p->result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
+{
+	struct gnet_dump d;
+	
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+			TCA_XSTATS, p->stats_lock, &d) < 0)
+		goto errout;
+	
+	if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(&d, &p->qstats) < 0)
+		goto errout;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto errout;
+
+	return 0;
+
+errout:
+	return -1;
+}
+
+
+EXPORT_SYMBOL(tcf_police);
+EXPORT_SYMBOL(tcf_police_destroy);
+EXPORT_SYMBOL(tcf_police_dump);
+EXPORT_SYMBOL(tcf_police_dump_stats);
+EXPORT_SYMBOL(tcf_police_hash);
+EXPORT_SYMBOL(tcf_police_ht);
+EXPORT_SYMBOL(tcf_police_locate);
+EXPORT_SYMBOL(tcf_police_lookup);
+EXPORT_SYMBOL(tcf_police_new_index);
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -0,0 +1,735 @@
+/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
+
+/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/atmdev.h>
+#include <linux/atmclip.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/file.h> /* for fput */
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+
+extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
+
+#if 0 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+/*
+ * The ATM queuing discipline provides a framework for invoking classifiers
+ * (aka "filters"), which in turn select classes of this queuing discipline.
+ * Each class maps the flow(s) it is handling to a given VC. Multiple classes
+ * may share the same VC.
+ *
+ * When creating a class, VCs are specified by passing the number of the open
+ * socket descriptor by which the calling process references the VC. The kernel
+ * keeps the VC open at least until all classes using it are removed.
+ *
+ * In this file, most functions are named atm_tc_* to avoid confusion with all
+ * the atm_* in net/atm. This naming convention differs from what's used in the
+ * rest of net/sched.
+ *
+ * Known bugs:
+ *  - sometimes messes up the IP stack
+ *  - any manipulations besides the few operations described in the README, are
+ *    untested and likely to crash the system
+ *  - should lock the flow while there is data in the queue (?)
+ */
+
+
+#define PRIV(sch) qdisc_priv(sch)
+#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
+
+
+struct atm_flow_data {
+	struct Qdisc		*q;		/* FIFO, TBF, etc. */
+	struct tcf_proto	*filter_list;
+	struct atm_vcc		*vcc;		/* VCC; NULL if VCC is closed */
+	void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* chaining */
+	struct atm_qdisc_data	*parent;	/* parent qdisc */
+	struct socket		*sock;		/* for closing */
+	u32			classid;	/* x:y type ID */
+	int			ref;		/* reference count */
+	struct gnet_stats_basic	bstats;
+	struct gnet_stats_queue	qstats;
+	spinlock_t		*stats_lock;
+	struct atm_flow_data	*next;
+	struct atm_flow_data	*excess;	/* flow for excess traffic;
+						   NULL to set CLP instead */
+	int			hdr_len;
+	unsigned char		hdr[0];		/* header data; MUST BE LAST */
+};
+
+struct atm_qdisc_data {
+	struct atm_flow_data	link;		/* unclassified skbs go here */
+	struct atm_flow_data	*flows;		/* NB: "link" is also on this
+						   list */
+	struct tasklet_struct	task;		/* requeue tasklet */
+};
+
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+
+static int find_flow(struct atm_qdisc_data *qdisc,struct atm_flow_data *flow)
+{
+	struct atm_flow_data *walk;
+
+	DPRINTK("find_flow(qdisc %p,flow %p)\n",qdisc,flow);
+	for (walk = qdisc->flows; walk; walk = walk->next)
+		if (walk == flow) return 1;
+	DPRINTK("find_flow: not found\n");
+	return 0;
+}
+
+
+static __inline__ struct atm_flow_data *lookup_flow(struct Qdisc *sch,
+    u32 classid)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+        for (flow = p->flows; flow; flow = flow->next)
+		if (flow->classid == classid) break;
+	return flow;
+}
+
+
+static int atm_tc_graft(struct Qdisc *sch,unsigned long arg,
+    struct Qdisc *new,struct Qdisc **old)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) arg;
+
+	DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",sch,
+	    p,flow,new,old);
+	if (!find_flow(p,flow)) return -EINVAL;
+	if (!new) new = &noop_qdisc;
+	*old = xchg(&flow->q,new);
+	if (*old) qdisc_reset(*old);
+        return 0;
+}
+
+
+static struct Qdisc *atm_tc_leaf(struct Qdisc *sch,unsigned long cl)
+{
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+
+	DPRINTK("atm_tc_leaf(sch %p,flow %p)\n",sch,flow);
+	return flow ? flow->q : NULL;
+}
+
+
+static unsigned long atm_tc_get(struct Qdisc *sch,u32 classid)
+{
+	struct atm_qdisc_data *p __attribute__((unused)) = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid);
+	flow = lookup_flow(sch,classid);
+        if (flow) flow->ref++;
+	DPRINTK("atm_tc_get: flow %p\n",flow);
+	return (unsigned long) flow;
+}
+
+
+static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
+    unsigned long parent, u32 classid)
+{
+	return atm_tc_get(sch,classid);
+}
+
+
+static void destroy_filters(struct atm_flow_data *flow)
+{
+	struct tcf_proto *filter;
+
+	while ((filter = flow->filter_list)) {
+		DPRINTK("destroy_filters: destroying filter %p\n",filter);
+		flow->filter_list = filter->next;
+		tcf_destroy(filter);
+	}
+}
+
+
+/*
+ * atm_tc_put handles all destructions, including the ones that are explicitly
+ * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
+ * anything that still seems to be in use.
+ */
+
+static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+	struct atm_flow_data **prev;
+
+	DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n",sch,p,flow);
+	if (--flow->ref) return;
+	DPRINTK("atm_tc_put: destroying\n");
+	for (prev = &p->flows; *prev; prev = &(*prev)->next)
+		if (*prev == flow) break;
+	if (!*prev) {
+		printk(KERN_CRIT "atm_tc_put: class %p not found\n",flow);
+		return;
+	}
+	*prev = flow->next;
+	DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
+	qdisc_destroy(flow->q);
+	destroy_filters(flow);
+	if (flow->sock) {
+		DPRINTK("atm_tc_put: f_count %d\n",
+		    file_count(flow->sock->file));
+		flow->vcc->pop = flow->old_pop;
+		sockfd_put(flow->sock);
+	}
+	if (flow->excess) atm_tc_put(sch,(unsigned long) flow->excess);
+	if (flow != &p->link) kfree(flow);
+	/*
+	 * If flow == &p->link, the qdisc no longer works at this point and
+	 * needs to be removed. (By the caller of atm_tc_put.)
+	 */
+}
+
+
+static void sch_atm_pop(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
+
+	D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n",vcc,skb,p);
+	VCC2FLOW(vcc)->old_pop(vcc,skb);
+	tasklet_schedule(&p->task);
+}
+
+static const u8 llc_oui_ip[] = {
+	0xaa,		/* DSAP: non-ISO */
+	0xaa,		/* SSAP: non-ISO */
+	0x03,		/* Ctrl: Unnumbered Information Command PDU */
+	0x00,		/* OUI: EtherType */
+	0x00, 0x00,
+	0x08, 0x00 };	/* Ethertype IP (0800) */
+
+static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
+    struct rtattr **tca, unsigned long *arg)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) *arg;
+	struct atm_flow_data *excess = NULL;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_ATM_MAX];
+	struct socket *sock;
+	int fd,error,hdr_len;
+	void *hdr;
+
+	DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
+	    "flow %p,opt %p)\n",sch,p,classid,parent,flow,opt);
+	/*
+	 * The concept of parents doesn't apply for this qdisc.
+	 */
+	if (parent && parent != TC_H_ROOT && parent != sch->handle)
+		return -EINVAL;
+	/*
+	 * ATM classes cannot be changed. In order to change properties of the
+	 * ATM connection, that socket needs to be modified directly (via the
+	 * native ATM API. In order to send a flow to a different VC, the old
+	 * class needs to be removed and a new one added. (This may be changed
+	 * later.)
+	 */
+	if (flow) return -EBUSY;
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt))
+		return -EINVAL;
+	if (!tb[TCA_ATM_FD-1] || RTA_PAYLOAD(tb[TCA_ATM_FD-1]) < sizeof(fd))
+		return -EINVAL;
+	fd = *(int *) RTA_DATA(tb[TCA_ATM_FD-1]);
+	DPRINTK("atm_tc_change: fd %d\n",fd);
+	if (tb[TCA_ATM_HDR-1]) {
+		hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR-1]);
+		hdr = RTA_DATA(tb[TCA_ATM_HDR-1]);
+	}
+	else {
+		hdr_len = RFC1483LLC_LEN;
+		hdr = NULL; /* default LLC/SNAP for IP */
+	}
+	if (!tb[TCA_ATM_EXCESS-1]) excess = NULL;
+	else {
+		if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS-1]) != sizeof(u32))
+			return -EINVAL;
+		excess = (struct atm_flow_data *) atm_tc_get(sch,
+		    *(u32 *) RTA_DATA(tb[TCA_ATM_EXCESS-1]));
+		if (!excess) return -ENOENT;
+	}
+	DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n",
+	    opt->rta_type,RTA_PAYLOAD(opt),hdr_len);
+	if (!(sock = sockfd_lookup(fd,&error))) return error; /* f_count++ */
+	DPRINTK("atm_tc_change: f_count %d\n",file_count(sock->file));
+        if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
+		error = -EPROTOTYPE;
+                goto err_out;
+	}
+	/* @@@ should check if the socket is really operational or we'll crash
+	   on vcc->send */
+	if (classid) {
+		if (TC_H_MAJ(classid ^ sch->handle)) {
+			DPRINTK("atm_tc_change: classid mismatch\n");
+			error = -EINVAL;
+			goto err_out;
+		}
+		if (find_flow(p,flow)) {
+			error = -EEXIST;
+			goto err_out;
+		}
+	}
+	else {
+		int i;
+		unsigned long cl;
+
+		for (i = 1; i < 0x8000; i++) {
+			classid = TC_H_MAKE(sch->handle,0x8000 | i);
+			if (!(cl = atm_tc_get(sch,classid))) break;
+			atm_tc_put(sch,cl);
+		}
+	}
+	DPRINTK("atm_tc_change: new id %x\n",classid);
+	flow = kmalloc(sizeof(struct atm_flow_data)+hdr_len,GFP_KERNEL);
+	DPRINTK("atm_tc_change: flow %p\n",flow);
+	if (!flow) {
+		error = -ENOBUFS;
+		goto err_out;
+	}
+	memset(flow,0,sizeof(*flow));
+	flow->filter_list = NULL;
+	if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
+		flow->q = &noop_qdisc;
+	DPRINTK("atm_tc_change: qdisc %p\n",flow->q);
+	flow->sock = sock;
+        flow->vcc = ATM_SD(sock); /* speedup */
+	flow->vcc->user_back = flow;
+        DPRINTK("atm_tc_change: vcc %p\n",flow->vcc);
+	flow->old_pop = flow->vcc->pop;
+	flow->parent = p;
+	flow->vcc->pop = sch_atm_pop;
+	flow->classid = classid;
+	flow->ref = 1;
+	flow->excess = excess;
+	flow->next = p->link.next;
+	p->link.next = flow;
+	flow->hdr_len = hdr_len;
+	if (hdr)
+		memcpy(flow->hdr,hdr,hdr_len);
+	else
+		memcpy(flow->hdr,llc_oui_ip,sizeof(llc_oui_ip));
+	*arg = (unsigned long) flow;
+	return 0;
+err_out:
+	if (excess) atm_tc_put(sch,(unsigned long) excess);
+	sockfd_put(sock);
+	return error;
+}
+
+
+static int atm_tc_delete(struct Qdisc *sch,unsigned long arg)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) arg;
+
+	DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n",sch,p,flow);
+	if (!find_flow(PRIV(sch),flow)) return -EINVAL;
+	if (flow->filter_list || flow == &p->link) return -EBUSY;
+	/*
+	 * Reference count must be 2: one for "keepalive" (set at class
+	 * creation), and one for the reference held when calling delete.
+	 */
+	if (flow->ref < 2) {
+		printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n",flow->ref);
+		return -EINVAL;
+	}
+	if (flow->ref > 2) return -EBUSY; /* catch references via excess, etc.*/
+	atm_tc_put(sch,arg);
+	return 0;
+}
+
+
+static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker);
+	if (walker->stop) return;
+	for (flow = p->flows; flow; flow = flow->next) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch,(unsigned long) flow,walker) < 0) {
+				walker->stop = 1;
+				break;
+			}
+		walker->count++;
+	}
+}
+
+
+static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch,unsigned long cl)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+
+	DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n",sch,p,flow);
+        return flow ? &flow->filter_list : &p->link.filter_list;
+}
+
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+
+static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = NULL ; /* @@@ */
+	struct tcf_result res;
+	int result;
+	int ret = NET_XMIT_POLICED;
+
+	D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+	result = TC_POLICE_OK; /* be nice to gcc */
+	if (TC_H_MAJ(skb->priority) != sch->handle ||
+	    !(flow = (struct atm_flow_data *) atm_tc_get(sch,skb->priority)))
+		for (flow = p->flows; flow; flow = flow->next)
+			if (flow->filter_list) {
+				result = tc_classify(skb,flow->filter_list,
+				    &res);
+				if (result < 0) continue;
+				flow = (struct atm_flow_data *) res.class;
+				if (!flow) flow = lookup_flow(sch,res.classid);
+				break;
+			}
+	if (!flow) flow = &p->link;
+	else {
+		if (flow->vcc)
+			ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
+			/*@@@ looks good ... but it's not supposed to work :-)*/
+#ifdef CONFIG_NET_CLS_POLICE
+		switch (result) {
+			case TC_POLICE_SHOT:
+				kfree_skb(skb);
+				break;
+			case TC_POLICE_RECLASSIFY:
+				if (flow->excess) flow = flow->excess;
+				else {
+					ATM_SKB(skb)->atm_options |=
+					    ATM_ATMOPT_CLP;
+					break;
+				}
+				/* fall through */
+			case TC_POLICE_OK:
+				/* fall through */
+			default:
+				break;
+		}
+#endif
+	}
+	if (
+#ifdef CONFIG_NET_CLS_POLICE
+	    result == TC_POLICE_SHOT ||
+#endif
+	    (ret = flow->q->enqueue(skb,flow->q)) != 0) {
+		sch->qstats.drops++;
+		if (flow) flow->qstats.drops++;
+		return ret;
+	}
+	sch->bstats.bytes += skb->len;
+	sch->bstats.packets++;
+	flow->bstats.bytes += skb->len;
+	flow->bstats.packets++;
+	/*
+	 * Okay, this may seem weird. We pretend we've dropped the packet if
+	 * it goes via ATM. The reason for this is that the outer qdisc
+	 * expects to be able to q->dequeue the packet later on if we return
+	 * success at this place. Also, sch->q.qdisc needs to reflect whether
+	 * there is a packet egligible for dequeuing or not. Note that the
+	 * statistics of the outer qdisc are necessarily wrong because of all
+	 * this. There's currently no correct solution for this.
+	 */
+	if (flow == &p->link) {
+		sch->q.qlen++;
+		return 0;
+	}
+	tasklet_schedule(&p->task);
+	return NET_XMIT_BYPASS;
+}
+
+
+/*
+ * Dequeue packets and send them over ATM. Note that we quite deliberately
+ * avoid checking net_device's flow control here, simply because sch_atm
+ * uses its own channels, which have nothing to do with any CLIP/LANE/or
+ * non-ATM interfaces.
+ */
+
+
+static void sch_atm_dequeue(unsigned long data)
+{
+	struct Qdisc *sch = (struct Qdisc *) data;
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+	struct sk_buff *skb;
+
+	D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n",sch,p);
+	for (flow = p->link.next; flow; flow = flow->next)
+		/*
+		 * If traffic is properly shaped, this won't generate nasty
+		 * little bursts. Otherwise, it may ... (but that's okay)
+		 */
+		while ((skb = flow->q->dequeue(flow->q))) {
+			if (!atm_may_send(flow->vcc,skb->truesize)) {
+				(void) flow->q->ops->requeue(skb,flow->q);
+				break;
+			}
+			D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
+			/* remove any LL header somebody else has attached */
+			skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data);
+			if (skb_headroom(skb) < flow->hdr_len) {
+				struct sk_buff *new;
+
+				new = skb_realloc_headroom(skb,flow->hdr_len);
+				dev_kfree_skb(skb);
+				if (!new) continue;
+				skb = new;
+			}
+			D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
+			    skb->nh.iph,skb->data);
+			ATM_SKB(skb)->vcc = flow->vcc;
+			memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
+			    flow->hdr_len);
+			atomic_add(skb->truesize,
+				   &sk_atm(flow->vcc)->sk_wmem_alloc);
+			/* atm.atm_options are already set by atm_tc_enqueue */
+			(void) flow->vcc->send(flow->vcc,skb);
+		}
+}
+
+
+static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct sk_buff *skb;
+
+	D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n",sch,p);
+	tasklet_schedule(&p->task);
+	skb = p->link.q->dequeue(p->link.q);
+	if (skb) sch->q.qlen--;
+	return skb;
+}
+
+
+static int atm_tc_requeue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	int ret;
+
+	D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+	ret = p->link.q->ops->requeue(skb,p->link.q);
+	if (!ret) {
+        sch->q.qlen++;
+        sch->qstats.requeues++;
+    } else {
+		sch->qstats.drops++;
+		p->link.qstats.drops++;
+	}
+	return ret;
+}
+
+
+static unsigned int atm_tc_drop(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+	unsigned int len;
+
+	DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n",sch,p);
+	for (flow = p->flows; flow; flow = flow->next)
+		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
+			return len;
+	return 0;
+}
+
+
+static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	p->flows = &p->link;
+	if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
+		p->link.q = &noop_qdisc;
+	DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q);
+	p->link.filter_list = NULL;
+	p->link.vcc = NULL;
+	p->link.sock = NULL;
+	p->link.classid = sch->handle;
+	p->link.ref = 1;
+	p->link.next = NULL;
+	tasklet_init(&p->task,sch_atm_dequeue,(unsigned long) sch);
+	return 0;
+}
+
+
+static void atm_tc_reset(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n",sch,p);
+	for (flow = p->flows; flow; flow = flow->next) qdisc_reset(flow->q);
+	sch->q.qlen = 0;
+}
+
+
+static void atm_tc_destroy(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
+	/* races ? */
+	while ((flow = p->flows)) {
+		destroy_filters(flow);
+		if (flow->ref > 1)
+			printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
+			    flow->ref);
+		atm_tc_put(sch,(unsigned long) flow);
+		if (p->flows == flow) {
+			printk(KERN_ERR "atm_destroy: putting flow %p didn't "
+			    "kill it\n",flow);
+			p->flows = flow->next; /* brute force */
+			break;
+		}
+	}
+	tasklet_kill(&p->task);
+}
+
+
+static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
+    struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
+	    sch,p,flow,skb,tcm);
+	if (!find_flow(p,flow)) return -EINVAL;
+	tcm->tcm_handle = flow->classid;
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb,TCA_ATM_HDR,flow->hdr_len,flow->hdr);
+	if (flow->vcc) {
+		struct sockaddr_atmpvc pvc;
+		int state;
+
+		pvc.sap_family = AF_ATMPVC;
+		pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
+		pvc.sap_addr.vpi = flow->vcc->vpi;
+		pvc.sap_addr.vci = flow->vcc->vci;
+		RTA_PUT(skb,TCA_ATM_ADDR,sizeof(pvc),&pvc);
+		state = ATM_VF2VS(flow->vcc->flags);
+		RTA_PUT(skb,TCA_ATM_STATE,sizeof(state),&state);
+	}
+	if (flow->excess)
+		RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(u32),&flow->classid);
+	else {
+		static u32 zero;
+
+		RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
+	}
+	rta->rta_len = skb->tail-b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb,b-skb->data);
+	return -1;
+}
+static int
+atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct atm_flow_data *flow = (struct atm_flow_data *) arg;
+
+	flow->qstats.qlen = flow->q->q.qlen;
+
+	if (gnet_stats_copy_basic(d, &flow->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &flow->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct Qdisc_class_ops atm_class_ops = {
+	.graft		=	atm_tc_graft,
+	.leaf		=	atm_tc_leaf,
+	.get		=	atm_tc_get,
+	.put		=	atm_tc_put,
+	.change		=	atm_tc_change,
+	.delete		=	atm_tc_delete,
+	.walk		=	atm_tc_walk,
+	.tcf_chain	=	atm_tc_find_tcf,
+	.bind_tcf	=	atm_tc_bind_filter,
+	.unbind_tcf	=	atm_tc_put,
+	.dump		=	atm_tc_dump_class,
+	.dump_stats	=	atm_tc_dump_class_stats,
+};
+
+static struct Qdisc_ops atm_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&atm_class_ops,
+	.id		=	"atm",
+	.priv_size	=	sizeof(struct atm_qdisc_data),
+	.enqueue	=	atm_tc_enqueue,
+	.dequeue	=	atm_tc_dequeue,
+	.requeue	=	atm_tc_requeue,
+	.drop		=	atm_tc_drop,
+	.init		=	atm_tc_init,
+	.reset		=	atm_tc_reset,
+	.destroy	=	atm_tc_destroy,
+	.change		=	NULL,
+	.dump		=	atm_tc_dump,
+	.owner		=	THIS_MODULE,
+};
+
+
+static int __init atm_init(void)
+{
+	return register_qdisc(&atm_qdisc_ops);
+}
+
+static void __exit atm_exit(void) 
+{
+	unregister_qdisc(&atm_qdisc_ops);
+}
+
+module_init(atm_init)
+module_exit(atm_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -0,0 +1,479 @@
+/* net/sched/sch_dsmark.c - Differentiated Services field marker */
+
+/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h> /* for pkt_sched */
+#include <linux/rtnetlink.h>
+#include <net/pkt_sched.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <asm/byteorder.h>
+
+
+#if 1 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+#define PRIV(sch) qdisc_priv(sch)
+
+
+/*
+ * classid	class		marking
+ * -------	-----		-------
+ *   n/a	  0		n/a
+ *   x:0	  1		use entry [0]
+ *   ...	 ...		...
+ *   x:y y>0	 y+1		use entry [y]
+ *   ...	 ...		...
+ * x:indices-1	indices		use entry [indices-1]
+ *   ...	 ...		...
+ *   x:y	 y+1		use entry [y & (indices-1)]
+ *   ...	 ...		...
+ * 0xffff	0x10000		use entry [indices-1]
+ */
+
+
+#define NO_DEFAULT_INDEX	(1 << 16)
+
+struct dsmark_qdisc_data {
+	struct Qdisc		*q;
+	struct tcf_proto	*filter_list;
+	__u8			*mask;	/* "owns" the array */
+	__u8			*value;
+	__u16			indices;
+	__u32			default_index;	/* index range is 0...0xffff */
+	int			set_tc_index;
+};
+
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+
+static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
+    struct Qdisc *new,struct Qdisc **old)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
+	    old);
+	if (!new)
+		new = &noop_qdisc;
+	sch_tree_lock(sch);
+	*old = xchg(&p->q,new);
+	if (*old)
+		qdisc_reset(*old);
+	sch->q.qlen = 0;
+	sch_tree_unlock(sch); /* @@@ move up ? */
+        return 0;
+}
+
+
+static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	return p->q;
+}
+
+
+static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
+{
+	struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch);
+
+	DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid);
+	return TC_H_MIN(classid)+1;
+}
+
+
+static unsigned long dsmark_bind_filter(struct Qdisc *sch,
+    unsigned long parent, u32 classid)
+{
+	return dsmark_get(sch,classid);
+}
+
+
+static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+
+static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
+    struct rtattr **tca, unsigned long *arg)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_DSMARK_MAX];
+
+	DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
+	    "arg 0x%lx\n",sch,p,classid,parent,*arg);
+	if (*arg > p->indices)
+		return -ENOENT;
+	if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
+		return -EINVAL;
+	if (tb[TCA_DSMARK_MASK-1]) {
+		if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
+			return -EINVAL;
+		p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
+	}
+	if (tb[TCA_DSMARK_VALUE-1]) {
+		if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
+			return -EINVAL;
+		p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
+	}
+	return 0;
+}
+
+
+static int dsmark_delete(struct Qdisc *sch,unsigned long arg)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	if (!arg || arg > p->indices)
+		return -EINVAL;
+	p->mask[arg-1] = 0xff;
+	p->value[arg-1] = 0;
+	return 0;
+}
+
+
+static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	int i;
+
+	DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker);
+	if (walker->stop)
+		return;
+	for (i = 0; i < p->indices; i++) {
+		if (p->mask[i] == 0xff && !p->value[i])
+			continue;
+		if (walker->count >= walker->skip) {
+			if (walker->fn(sch, i+1, walker) < 0) {
+				walker->stop = 1;
+				break;
+			}
+		}
+                walker->count++;
+        }
+}
+
+
+static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	return &p->filter_list;
+}
+
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+
+static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct tcf_result res;
+	int result;
+	int ret = NET_XMIT_POLICED;
+
+	D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+	if (p->set_tc_index) {
+		/* FIXME: Safe with non-linear skbs? --RR */
+		switch (skb->protocol) {
+			case __constant_htons(ETH_P_IP):
+				skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
+					& ~INET_ECN_MASK;
+				break;
+			case __constant_htons(ETH_P_IPV6):
+				skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
+					& ~INET_ECN_MASK;
+				break;
+			default:
+				skb->tc_index = 0;
+				break;
+		};
+	}
+	result = TC_POLICE_OK; /* be nice to gcc */
+	if (TC_H_MAJ(skb->priority) == sch->handle) {
+		skb->tc_index = TC_H_MIN(skb->priority);
+	} else {
+		result = tc_classify(skb,p->filter_list,&res);
+		D2PRINTK("result %d class 0x%04x\n",result,res.classid);
+		switch (result) {
+#ifdef CONFIG_NET_CLS_POLICE
+			case TC_POLICE_SHOT:
+				kfree_skb(skb);
+				break;
+#if 0
+			case TC_POLICE_RECLASSIFY:
+				/* FIXME: what to do here ??? */
+#endif
+#endif
+			case TC_POLICE_OK:
+				skb->tc_index = TC_H_MIN(res.classid);
+				break;
+			case TC_POLICE_UNSPEC:
+				/* fall through */
+			default:
+				if (p->default_index != NO_DEFAULT_INDEX)
+					skb->tc_index = p->default_index;
+				break;
+		};
+	}
+	if (
+#ifdef CONFIG_NET_CLS_POLICE
+	    result == TC_POLICE_SHOT ||
+#endif
+
+	    ((ret = p->q->enqueue(skb,p->q)) != 0)) {
+		sch->qstats.drops++;
+		return ret;
+	}
+	sch->bstats.bytes += skb->len;
+	sch->bstats.packets++;
+	sch->q.qlen++;
+	return ret;
+}
+
+
+static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct sk_buff *skb;
+	int index;
+
+	D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
+	skb = p->q->ops->dequeue(p->q);
+	if (!skb)
+		return NULL;
+	sch->q.qlen--;
+	index = skb->tc_index & (p->indices-1);
+	D2PRINTK("index %d->%d\n",skb->tc_index,index);
+	switch (skb->protocol) {
+		case __constant_htons(ETH_P_IP):
+			ipv4_change_dsfield(skb->nh.iph,
+			    p->mask[index],p->value[index]);
+			break;
+		case __constant_htons(ETH_P_IPV6):
+			ipv6_change_dsfield(skb->nh.ipv6h,
+			    p->mask[index],p->value[index]);
+			break;
+		default:
+			/*
+			 * Only complain if a change was actually attempted.
+			 * This way, we can send non-IP traffic through dsmark
+			 * and don't need yet another qdisc as a bypass.
+			 */
+			if (p->mask[index] != 0xff || p->value[index])
+				printk(KERN_WARNING "dsmark_dequeue: "
+				       "unsupported protocol %d\n",
+				       htons(skb->protocol));
+			break;
+	};
+	return skb;
+}
+
+
+static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	int ret;
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+        if ((ret = p->q->ops->requeue(skb, p->q)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return 0;
+	}
+	sch->qstats.drops++;
+	return ret;
+}
+
+
+static unsigned int dsmark_drop(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	unsigned int len;
+	
+	DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
+	if (!p->q->ops->drop)
+		return 0;
+	if (!(len = p->q->ops->drop(p->q)))
+		return 0;
+	sch->q.qlen--;
+	return len;
+}
+
+
+static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct rtattr *tb[TCA_DSMARK_MAX];
+	__u16 tmp;
+
+	DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	if (!opt ||
+	    rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 ||
+	    !tb[TCA_DSMARK_INDICES-1] ||
+	    RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16))
+                return -EINVAL;
+	p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]);
+	if (!p->indices)
+		return -EINVAL;
+	for (tmp = p->indices; tmp != 1; tmp >>= 1) {
+		if (tmp & 1)
+			return -EINVAL;
+	}
+	p->default_index = NO_DEFAULT_INDEX;
+	if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) {
+		if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16))
+			return -EINVAL;
+		p->default_index =
+		    *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
+	}
+	p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1];
+	p->mask = kmalloc(p->indices*2,GFP_KERNEL);
+	if (!p->mask)
+		return -ENOMEM;
+	p->value = p->mask+p->indices;
+	memset(p->mask,0xff,p->indices);
+	memset(p->value,0,p->indices);
+	if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+		p->q = &noop_qdisc;
+	DPRINTK("dsmark_init: qdisc %p\n",&p->q);
+	return 0;
+}
+
+
+static void dsmark_reset(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
+	qdisc_reset(p->q);
+	sch->q.qlen = 0;
+}
+
+
+static void dsmark_destroy(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct tcf_proto *tp;
+
+	DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p);
+	while (p->filter_list) {
+		tp = p->filter_list;
+		p->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+	qdisc_destroy(p->q);
+	kfree(p->mask);
+}
+
+
+static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
+    struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl);
+	if (!cl || cl > p->indices)
+		return -EINVAL;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1);
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]);
+	RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]);
+	rta->rta_len = skb->tail-b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb,b-skb->data);
+	return -1;
+}
+
+static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices);
+	if (p->default_index != NO_DEFAULT_INDEX) {
+		__u16 tmp = p->default_index;
+
+		RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
+	}
+	if (p->set_tc_index)
+		RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL);
+	rta->rta_len = skb->tail-b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb,b-skb->data);
+	return -1;
+}
+
+static struct Qdisc_class_ops dsmark_class_ops = {
+	.graft		=	dsmark_graft,
+	.leaf		=	dsmark_leaf,
+	.get		=	dsmark_get,
+	.put		=	dsmark_put,
+	.change		=	dsmark_change,
+	.delete		=	dsmark_delete,
+	.walk		=	dsmark_walk,
+	.tcf_chain	=	dsmark_find_tcf,
+	.bind_tcf	=	dsmark_bind_filter,
+	.unbind_tcf	=	dsmark_put,
+	.dump		=	dsmark_dump_class,
+};
+
+static struct Qdisc_ops dsmark_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&dsmark_class_ops,
+	.id		=	"dsmark",
+	.priv_size	=	sizeof(struct dsmark_qdisc_data),
+	.enqueue	=	dsmark_enqueue,
+	.dequeue	=	dsmark_dequeue,
+	.requeue	=	dsmark_requeue,
+	.drop		=	dsmark_drop,
+	.init		=	dsmark_init,
+	.reset		=	dsmark_reset,
+	.destroy	=	dsmark_destroy,
+	.change		=	NULL,
+	.dump		=	dsmark_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init dsmark_module_init(void)
+{
+	return register_qdisc(&dsmark_qdisc_ops);
+}
+static void __exit dsmark_module_exit(void) 
+{
+	unregister_qdisc(&dsmark_qdisc_ops);
+}
+module_init(dsmark_module_init)
+module_exit(dsmark_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -0,0 +1,212 @@
+/*
+ * net/sched/sch_fifo.c	The simplest FIFO queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/* 1 band FIFO pseudo-"scheduler" */
+
+struct fifo_sched_data
+{
+	unsigned limit;
+};
+
+static int
+bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (sch->qstats.backlog + skb->len <= q->limit) {
+		__skb_queue_tail(&sch->q, skb);
+		sch->qstats.backlog += skb->len;
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+	sch->qstats.drops++;
+#ifdef CONFIG_NET_CLS_POLICE
+	if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
+#endif
+		kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static int
+bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.backlog += skb->len;
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+bfifo_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue(&sch->q);
+	if (skb)
+		sch->qstats.backlog -= skb->len;
+	return skb;
+}
+
+static unsigned int 
+fifo_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		unsigned int len = skb->len;
+		sch->qstats.backlog -= len;
+		kfree_skb(skb);
+		return len;
+	}
+	return 0;
+}
+
+static void
+fifo_reset(struct Qdisc* sch)
+{
+	skb_queue_purge(&sch->q);
+	sch->qstats.backlog = 0;
+}
+
+static int
+pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (sch->q.qlen < q->limit) {
+		__skb_queue_tail(&sch->q, skb);
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+	sch->qstats.drops++;
+#ifdef CONFIG_NET_CLS_POLICE
+	if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
+#endif
+		kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static int
+pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.requeues++;
+	return 0;
+}
+
+
+static struct sk_buff *
+pfifo_dequeue(struct Qdisc* sch)
+{
+	return __skb_dequeue(&sch->q);
+}
+
+static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (opt == NULL) {
+		unsigned int limit = sch->dev->tx_queue_len ? : 1;
+
+		if (sch->ops == &bfifo_qdisc_ops)
+			q->limit = limit*sch->dev->mtu;
+		else	
+			q->limit = limit;
+	} else {
+		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
+		if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+			return -EINVAL;
+		q->limit = ctl->limit;
+	}
+	return 0;
+}
+
+static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct tc_fifo_qopt opt;
+
+	opt.limit = q->limit;
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+struct Qdisc_ops pfifo_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"pfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	pfifo_enqueue,
+	.dequeue	=	pfifo_dequeue,
+	.requeue	=	pfifo_requeue,
+	.drop		=	fifo_drop,
+	.init		=	fifo_init,
+	.reset		=	fifo_reset,
+	.destroy	=	NULL,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc_ops bfifo_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"bfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	bfifo_enqueue,
+	.dequeue	=	bfifo_dequeue,
+	.requeue	=	bfifo_requeue,
+	.drop		=	fifo_drop,
+	.init		=	fifo_init,
+	.reset		=	fifo_reset,
+	.destroy	=	NULL,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+
+EXPORT_SYMBOL(bfifo_qdisc_ops);
+EXPORT_SYMBOL(pfifo_qdisc_ops);
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -0,0 +1,609 @@
+/*
+ * net/sched/sch_generic.c	Generic packet scheduler routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
+ *              - Ingress support
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/* Main transmission queue. */
+
+/* Main qdisc structure lock. 
+
+   However, modifications
+   to data, participating in scheduling must be additionally
+   protected with dev->queue_lock spinlock.
+
+   The idea is the following:
+   - enqueue, dequeue are serialized via top level device
+     spinlock dev->queue_lock.
+   - tree walking is protected by read_lock_bh(qdisc_tree_lock)
+     and this lock is used only in process context.
+   - updates to tree are made under rtnl semaphore or
+     from softirq context (__qdisc_destroy rcu-callback)
+     hence this lock needs local bh disabling.
+
+   qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+ */
+DEFINE_RWLOCK(qdisc_tree_lock);
+
+void qdisc_lock_tree(struct net_device *dev)
+{
+	write_lock_bh(&qdisc_tree_lock);
+	spin_lock_bh(&dev->queue_lock);
+}
+
+void qdisc_unlock_tree(struct net_device *dev)
+{
+	spin_unlock_bh(&dev->queue_lock);
+	write_unlock_bh(&qdisc_tree_lock);
+}
+
+/* 
+   dev->queue_lock serializes queue accesses for this device
+   AND dev->qdisc pointer itself.
+
+   dev->xmit_lock serializes accesses to device driver.
+
+   dev->queue_lock and dev->xmit_lock are mutually exclusive,
+   if one is grabbed, another must be free.
+ */
+
+
+/* Kick device.
+   Note, that this procedure can be called by a watchdog timer, so that
+   we do not check dev->tbusy flag here.
+
+   Returns:  0  - queue is empty.
+            >0  - queue is not empty, but throttled.
+	    <0  - queue is not empty. Device is throttled, if dev->tbusy != 0.
+
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+*/
+
+int qdisc_restart(struct net_device *dev)
+{
+	struct Qdisc *q = dev->qdisc;
+	struct sk_buff *skb;
+
+	/* Dequeue packet */
+	if ((skb = q->dequeue(q)) != NULL) {
+		unsigned nolock = (dev->features & NETIF_F_LLTX);
+		/*
+		 * When the driver has LLTX set it does its own locking
+		 * in start_xmit. No need to add additional overhead by
+		 * locking again. These checks are worth it because
+		 * even uncongested locks can be quite expensive.
+		 * The driver can do trylock like here too, in case
+		 * of lock congestion it should return -1 and the packet
+		 * will be requeued.
+		 */
+		if (!nolock) {
+			if (!spin_trylock(&dev->xmit_lock)) {
+			collision:
+				/* So, someone grabbed the driver. */
+				
+				/* It may be transient configuration error,
+				   when hard_start_xmit() recurses. We detect
+				   it by checking xmit owner and drop the
+				   packet when deadloop is detected.
+				*/
+				if (dev->xmit_lock_owner == smp_processor_id()) {
+					kfree_skb(skb);
+					if (net_ratelimit())
+						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+					return -1;
+				}
+				__get_cpu_var(netdev_rx_stat).cpu_collision++;
+				goto requeue;
+			}
+			/* Remember that the driver is grabbed by us. */
+			dev->xmit_lock_owner = smp_processor_id();
+		}
+		
+		{
+			/* And release queue */
+			spin_unlock(&dev->queue_lock);
+
+			if (!netif_queue_stopped(dev)) {
+				int ret;
+				if (netdev_nit)
+					dev_queue_xmit_nit(skb, dev);
+
+				ret = dev->hard_start_xmit(skb, dev);
+				if (ret == NETDEV_TX_OK) { 
+					if (!nolock) {
+						dev->xmit_lock_owner = -1;
+						spin_unlock(&dev->xmit_lock);
+					}
+					spin_lock(&dev->queue_lock);
+					return -1;
+				}
+				if (ret == NETDEV_TX_LOCKED && nolock) {
+					spin_lock(&dev->queue_lock);
+					goto collision; 
+				}
+			}
+
+			/* NETDEV_TX_BUSY - we need to requeue */
+			/* Release the driver */
+			if (!nolock) { 
+				dev->xmit_lock_owner = -1;
+				spin_unlock(&dev->xmit_lock);
+			} 
+			spin_lock(&dev->queue_lock);
+			q = dev->qdisc;
+		}
+
+		/* Device kicked us out :(
+		   This is possible in three cases:
+
+		   0. driver is locked
+		   1. fastroute is enabled
+		   2. device cannot determine busy state
+		      before start of transmission (f.e. dialout)
+		   3. device is buggy (ppp)
+		 */
+
+requeue:
+		q->ops->requeue(skb, q);
+		netif_schedule(dev);
+		return 1;
+	}
+	return q->q.qlen;
+}
+
+static void dev_watchdog(unsigned long arg)
+{
+	struct net_device *dev = (struct net_device *)arg;
+
+	spin_lock(&dev->xmit_lock);
+	if (dev->qdisc != &noop_qdisc) {
+		if (netif_device_present(dev) &&
+		    netif_running(dev) &&
+		    netif_carrier_ok(dev)) {
+			if (netif_queue_stopped(dev) &&
+			    (jiffies - dev->trans_start) > dev->watchdog_timeo) {
+				printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
+				dev->tx_timeout(dev);
+			}
+			if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+				dev_hold(dev);
+		}
+	}
+	spin_unlock(&dev->xmit_lock);
+
+	dev_put(dev);
+}
+
+static void dev_watchdog_init(struct net_device *dev)
+{
+	init_timer(&dev->watchdog_timer);
+	dev->watchdog_timer.data = (unsigned long)dev;
+	dev->watchdog_timer.function = dev_watchdog;
+}
+
+void __netdev_watchdog_up(struct net_device *dev)
+{
+	if (dev->tx_timeout) {
+		if (dev->watchdog_timeo <= 0)
+			dev->watchdog_timeo = 5*HZ;
+		if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+			dev_hold(dev);
+	}
+}
+
+static void dev_watchdog_up(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	__netdev_watchdog_up(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+static void dev_watchdog_down(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	if (del_timer(&dev->watchdog_timer))
+		__dev_put(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
+   under all circumstances. It is difficult to invent anything faster or
+   cheaper.
+ */
+
+static int
+noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
+{
+	kfree_skb(skb);
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *
+noop_dequeue(struct Qdisc * qdisc)
+{
+	return NULL;
+}
+
+static int
+noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	if (net_ratelimit())
+		printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
+	kfree_skb(skb);
+	return NET_XMIT_CN;
+}
+
+struct Qdisc_ops noop_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"noop",
+	.priv_size	=	0,
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.requeue	=	noop_requeue,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc noop_qdisc = {
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.flags		=	TCQ_F_BUILTIN,
+	.ops		=	&noop_qdisc_ops,	
+	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+};
+
+static struct Qdisc_ops noqueue_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"noqueue",
+	.priv_size	=	0,
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.requeue	=	noop_requeue,
+	.owner		=	THIS_MODULE,
+};
+
+static struct Qdisc noqueue_qdisc = {
+	.enqueue	=	NULL,
+	.dequeue	=	noop_dequeue,
+	.flags		=	TCQ_F_BUILTIN,
+	.ops		=	&noqueue_qdisc_ops,
+	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
+};
+
+
+static const u8 prio2band[TC_PRIO_MAX+1] =
+	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+
+/* 3-band FIFO queue: old style, but should be a bit faster than
+   generic prio+fifo combination.
+ */
+
+static int
+pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	list += prio2band[skb->priority&TC_PRIO_MAX];
+
+	if (list->qlen < qdisc->dev->tx_queue_len) {
+		__skb_queue_tail(list, skb);
+		qdisc->q.qlen++;
+		qdisc->bstats.bytes += skb->len;
+		qdisc->bstats.packets++;
+		return 0;
+	}
+	qdisc->qstats.drops++;
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static struct sk_buff *
+pfifo_fast_dequeue(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+	struct sk_buff *skb;
+
+	for (prio = 0; prio < 3; prio++, list++) {
+		skb = __skb_dequeue(list);
+		if (skb) {
+			qdisc->q.qlen--;
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+static int
+pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	list += prio2band[skb->priority&TC_PRIO_MAX];
+
+	__skb_queue_head(list, skb);
+	qdisc->q.qlen++;
+	qdisc->qstats.requeues++;
+	return 0;
+}
+
+static void
+pfifo_fast_reset(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio=0; prio < 3; prio++)
+		skb_queue_purge(list+prio);
+	qdisc->q.qlen = 0;
+}
+
+static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_prio_qopt opt;
+
+	opt.bands = 3; 
+	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
+{
+	int i;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (i=0; i<3; i++)
+		skb_queue_head_init(list+i);
+
+	return 0;
+}
+
+static struct Qdisc_ops pfifo_fast_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"pfifo_fast",
+	.priv_size	=	3 * sizeof(struct sk_buff_head),
+	.enqueue	=	pfifo_fast_enqueue,
+	.dequeue	=	pfifo_fast_dequeue,
+	.requeue	=	pfifo_fast_requeue,
+	.init		=	pfifo_fast_init,
+	.reset		=	pfifo_fast_reset,
+	.dump		=	pfifo_fast_dump,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
+{
+	void *p;
+	struct Qdisc *sch;
+	int size;
+
+	/* ensure that the Qdisc and the private data are 32-byte aligned */
+	size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
+	size += ops->priv_size + QDISC_ALIGN_CONST;
+
+	p = kmalloc(size, GFP_KERNEL);
+	if (!p)
+		return NULL;
+	memset(p, 0, size);
+
+	sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) 
+			       & ~QDISC_ALIGN_CONST);
+	sch->padded = (char *)sch - (char *)p;
+
+	INIT_LIST_HEAD(&sch->list);
+	skb_queue_head_init(&sch->q);
+	sch->ops = ops;
+	sch->enqueue = ops->enqueue;
+	sch->dequeue = ops->dequeue;
+	sch->dev = dev;
+	dev_hold(dev);
+	sch->stats_lock = &dev->queue_lock;
+	atomic_set(&sch->refcnt, 1);
+	if (!ops->init || ops->init(sch, NULL) == 0)
+		return sch;
+
+	dev_put(dev);
+	kfree(p);
+	return NULL;
+}
+
+/* Under dev->queue_lock and BH! */
+
+void qdisc_reset(struct Qdisc *qdisc)
+{
+	struct Qdisc_ops *ops = qdisc->ops;
+
+	if (ops->reset)
+		ops->reset(qdisc);
+}
+
+/* this is the rcu callback function to clean up a qdisc when there 
+ * are no further references to it */
+
+static void __qdisc_destroy(struct rcu_head *head)
+{
+	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
+	struct Qdisc_ops  *ops = qdisc->ops;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+#endif
+	write_lock(&qdisc_tree_lock);
+	if (ops->reset)
+		ops->reset(qdisc);
+	if (ops->destroy)
+		ops->destroy(qdisc);
+	write_unlock(&qdisc_tree_lock);
+	module_put(ops->owner);
+
+	dev_put(qdisc->dev);
+	kfree((char *) qdisc - qdisc->padded);
+}
+
+/* Under dev->queue_lock and BH! */
+
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+	struct list_head cql = LIST_HEAD_INIT(cql);
+	struct Qdisc *cq, *q, *n;
+
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+		!atomic_dec_and_test(&qdisc->refcnt))
+		return;
+
+	if (!list_empty(&qdisc->list)) {
+		if (qdisc->ops->cl_ops == NULL)
+			list_del(&qdisc->list);
+		else
+			list_move(&qdisc->list, &cql);
+	}
+
+	/* unlink inner qdiscs from dev->qdisc_list immediately */
+	list_for_each_entry(cq, &cql, list)
+		list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list)
+			if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) {
+				if (q->ops->cl_ops == NULL)
+					list_del_init(&q->list);
+				else
+					list_move_tail(&q->list, &cql);
+			}
+	list_for_each_entry_safe(cq, n, &cql, list)
+		list_del_init(&cq->list);
+
+	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
+}
+
+void dev_activate(struct net_device *dev)
+{
+	/* No queueing discipline is attached to device;
+	   create default one i.e. pfifo_fast for devices,
+	   which need queueing and noqueue_qdisc for
+	   virtual interfaces
+	 */
+
+	if (dev->qdisc_sleeping == &noop_qdisc) {
+		struct Qdisc *qdisc;
+		if (dev->tx_queue_len) {
+			qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
+			if (qdisc == NULL) {
+				printk(KERN_INFO "%s: activation failed\n", dev->name);
+				return;
+			}
+			write_lock_bh(&qdisc_tree_lock);
+			list_add_tail(&qdisc->list, &dev->qdisc_list);
+			write_unlock_bh(&qdisc_tree_lock);
+		} else {
+			qdisc =  &noqueue_qdisc;
+		}
+		write_lock_bh(&qdisc_tree_lock);
+		dev->qdisc_sleeping = qdisc;
+		write_unlock_bh(&qdisc_tree_lock);
+	}
+
+	spin_lock_bh(&dev->queue_lock);
+	rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
+	if (dev->qdisc != &noqueue_qdisc) {
+		dev->trans_start = jiffies;
+		dev_watchdog_up(dev);
+	}
+	spin_unlock_bh(&dev->queue_lock);
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+	struct Qdisc *qdisc;
+
+	spin_lock_bh(&dev->queue_lock);
+	qdisc = dev->qdisc;
+	dev->qdisc = &noop_qdisc;
+
+	qdisc_reset(qdisc);
+
+	spin_unlock_bh(&dev->queue_lock);
+
+	dev_watchdog_down(dev);
+
+	while (test_bit(__LINK_STATE_SCHED, &dev->state))
+		yield();
+
+	spin_unlock_wait(&dev->xmit_lock);
+}
+
+void dev_init_scheduler(struct net_device *dev)
+{
+	qdisc_lock_tree(dev);
+	dev->qdisc = &noop_qdisc;
+	dev->qdisc_sleeping = &noop_qdisc;
+	INIT_LIST_HEAD(&dev->qdisc_list);
+	qdisc_unlock_tree(dev);
+
+	dev_watchdog_init(dev);
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+	struct Qdisc *qdisc;
+
+	qdisc_lock_tree(dev);
+	qdisc = dev->qdisc_sleeping;
+	dev->qdisc = &noop_qdisc;
+	dev->qdisc_sleeping = &noop_qdisc;
+	qdisc_destroy(qdisc);
+#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
+        if ((qdisc = dev->qdisc_ingress) != NULL) {
+		dev->qdisc_ingress = NULL;
+		qdisc_destroy(qdisc);
+        }
+#endif
+	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
+	qdisc_unlock_tree(dev);
+}
+
+EXPORT_SYMBOL(__netdev_watchdog_up);
+EXPORT_SYMBOL(noop_qdisc);
+EXPORT_SYMBOL(noop_qdisc_ops);
+EXPORT_SYMBOL(qdisc_create_dflt);
+EXPORT_SYMBOL(qdisc_destroy);
+EXPORT_SYMBOL(qdisc_reset);
+EXPORT_SYMBOL(qdisc_restart);
+EXPORT_SYMBOL(qdisc_lock_tree);
+EXPORT_SYMBOL(qdisc_unlock_tree);
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -0,0 +1,630 @@
+/*
+ * net/sched/sch_gred.c	Generic Random Early Detection queue.
+ *
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
+ *
+ *             991129: -  Bug fix with grio mode
+ *		       - a better sing. AvgQ mode with Grio(WRED)
+ *		       - A finer grained VQ dequeue based on sugestion
+ *		         from Ren Liu
+ *		       - More error checks
+ *
+ *
+ *
+ *  For all the glorious comments look at Alexey's sch_red.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+#if 1 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+struct gred_sched_data;
+struct gred_sched;
+
+struct gred_sched_data
+{
+/* Parameters */
+	u32		limit;		/* HARD maximal queue length	*/
+	u32		qth_min;	/* Min average length threshold: A scaled */
+	u32		qth_max;	/* Max average length threshold: A scaled */
+	u32      	DP;		/* the drop pramaters */
+	char		Wlog;		/* log(W)		*/
+	char		Plog;		/* random number bits	*/
+	u32		Scell_max;
+	u32		Rmask;
+	u32		bytesin;	/* bytes seen on virtualQ so far*/
+	u32		packetsin;	/* packets seen on virtualQ so far*/
+	u32		backlog;	/* bytes on the virtualQ */
+	u32		forced;	/* packets dropped for exceeding limits */
+	u32		early;	/* packets dropped as a warning */
+	u32		other;	/* packets dropped by invoking drop() */
+	u32		pdrop;	/* packets dropped because we exceeded physical queue limits */
+	char		Scell_log;
+	u8		Stab[256];
+	u8              prio;        /* the prio of this vq */
+
+/* Variables */
+	unsigned long	qave;		/* Average queue length: A scaled */
+	int		qcount;		/* Packets since last random number generation */
+	u32		qR;		/* Cached random number */
+
+	psched_time_t	qidlestart;	/* Start of idle period	*/
+};
+
+struct gred_sched
+{
+	struct gred_sched_data *tab[MAX_DPs];
+	u32 		DPs;   
+	u32 		def; 
+	u8 		initd; 
+	u8 		grio; 
+	u8 		eqp; 
+};
+
+static int
+gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	psched_time_t now;
+	struct gred_sched_data *q=NULL;
+	struct gred_sched *t= qdisc_priv(sch);
+	unsigned long	qave=0;	
+	int i=0;
+
+	if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
+		D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
+		goto do_enqueue;
+	}
+
+
+	if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
+		printk("GRED: setting to default (%d)\n ",t->def);
+		if (!(q=t->tab[t->def])) {
+			DPRINTK("GRED: setting to default FAILED! dropping!! "
+			    "(%d)\n ", t->def);
+			goto drop;
+		}
+		/* fix tc_index? --could be controvesial but needed for
+		   requeueing */
+		skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
+	}
+
+	D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
+	    "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
+	    sch->qstats.backlog);
+	/* sum up all the qaves of prios <= to ours to get the new qave*/
+	if (!t->eqp && t->grio) {
+		for (i=0;i<t->DPs;i++) {
+			if ((!t->tab[i]) || (i==q->DP))	
+				continue; 
+				
+			if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
+				qave +=t->tab[i]->qave;
+		}
+			
+	}
+
+	q->packetsin++;
+	q->bytesin+=skb->len;
+
+	if (t->eqp && t->grio) {
+		qave=0;
+		q->qave=t->tab[t->def]->qave;
+		q->qidlestart=t->tab[t->def]->qidlestart;
+	}
+
+	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+		long us_idle;
+		PSCHED_GET_TIME(now);
+		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
+	} else {
+		if (t->eqp) {
+			q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
+		} else {
+			q->qave += q->backlog - (q->qave >> q->Wlog);
+		}
+
+	}
+	
+
+	if (t->eqp && t->grio) 
+		t->tab[t->def]->qave=q->qave;
+
+	if ((q->qave+qave) < q->qth_min) {
+		q->qcount = -1;
+enqueue:
+		if (q->backlog + skb->len <= q->limit) {
+			q->backlog += skb->len;
+do_enqueue:
+			__skb_queue_tail(&sch->q, skb);
+			sch->qstats.backlog += skb->len;
+			sch->bstats.bytes += skb->len;
+			sch->bstats.packets++;
+			return 0;
+		} else {
+			q->pdrop++;
+		}
+
+drop:
+		kfree_skb(skb);
+		sch->qstats.drops++;
+		return NET_XMIT_DROP;
+	}
+	if ((q->qave+qave) >= q->qth_max) {
+		q->qcount = -1;
+		sch->qstats.overlimits++;
+		q->forced++;
+		goto drop;
+	}
+	if (++q->qcount) {
+		if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
+			goto enqueue;
+		q->qcount = 0;
+		q->qR = net_random()&q->Rmask;
+		sch->qstats.overlimits++;
+		q->early++;
+		goto drop;
+	}
+	q->qR = net_random()&q->Rmask;
+	goto enqueue;
+}
+
+static int
+gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+	q= t->tab[(skb->tc_index&0xf)];
+/* error checking here -- probably unnecessary */
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.backlog += skb->len;
+	sch->qstats.requeues++;
+	q->backlog += skb->len;
+	return 0;
+}
+
+static struct sk_buff *
+gred_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+
+	skb = __skb_dequeue(&sch->q);
+	if (skb) {
+		sch->qstats.backlog -= skb->len;
+		q= t->tab[(skb->tc_index&0xf)];
+		if (q) {
+			q->backlog -= skb->len;
+			if (!q->backlog && !t->eqp)
+				PSCHED_GET_TIME(q->qidlestart);
+		} else {
+			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
+		}
+		return skb;
+	}
+
+	if (t->eqp) {
+			q= t->tab[t->def];
+			if (!q)	
+				D2PRINTK("no default VQ set: Results will be "
+				       "screwed up\n");
+			else
+				PSCHED_GET_TIME(q->qidlestart);
+	}
+
+	return NULL;
+}
+
+static unsigned int gred_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		unsigned int len = skb->len;
+		sch->qstats.backlog -= len;
+		sch->qstats.drops++;
+		q= t->tab[(skb->tc_index&0xf)];
+		if (q) {
+			q->backlog -= len;
+			q->other++;
+			if (!q->backlog && !t->eqp)
+				PSCHED_GET_TIME(q->qidlestart);
+		} else {
+			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
+		}
+
+		kfree_skb(skb);
+		return len;
+	}
+
+	q=t->tab[t->def];
+	if (!q) {
+		D2PRINTK("no default VQ set: Results might be screwed up\n");
+		return 0;
+	}
+
+	PSCHED_GET_TIME(q->qidlestart);
+	return 0;
+
+}
+
+static void gred_reset(struct Qdisc* sch)
+{
+	int i;
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+
+	__skb_queue_purge(&sch->q);
+
+	sch->qstats.backlog = 0;
+
+        for (i=0;i<t->DPs;i++) {
+	        q= t->tab[i];
+		if (!q)	
+			continue; 
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+		q->qave = 0;
+		q->qcount = -1;
+		q->backlog = 0;
+		q->other=0;
+		q->forced=0;
+		q->pdrop=0;
+		q->early=0;
+	}
+}
+
+static int gred_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct gred_sched_data *q;
+	struct tc_gred_qopt *ctl;
+	struct tc_gred_sopt *sopt;
+	struct rtattr *tb[TCA_GRED_STAB];
+	struct rtattr *tb2[TCA_GRED_DPS];
+	int i;
+
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
+		return -EINVAL;
+
+	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
+		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
+
+	    if (tb2[TCA_GRED_DPS-1] == 0) 
+			return -EINVAL;
+
+		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
+		table->DPs=sopt->DPs;   
+		table->def=sopt->def_DP; 
+		table->grio=sopt->grio; 
+		table->initd=0;
+		/* probably need to clear all the table DP entries as well */
+		return 0;
+	    }
+
+
+	if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
+		RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
+		RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
+			return -EINVAL;
+
+	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
+	if (ctl->DP > MAX_DPs-1 ) {
+		/* misbehaving is punished! Put in the default drop probability */
+		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
+			"set to default at %d\n",ctl->DP,table->def);
+		ctl->DP=table->def;
+	}
+	
+	if (table->tab[ctl->DP] == NULL) {
+		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
+					    GFP_KERNEL);
+		if (NULL == table->tab[ctl->DP])
+			return -ENOMEM;
+		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
+	}
+	q= table->tab[ctl->DP]; 
+
+	if (table->grio) {
+		if (ctl->prio <=0) {
+			if (table->def && table->tab[table->def]) {
+				DPRINTK("\nGRED: DP %u does not have a prio"
+					"setting default to %d\n",ctl->DP,
+					table->tab[table->def]->prio);
+				q->prio=table->tab[table->def]->prio;
+			} else { 
+				DPRINTK("\nGRED: DP %u does not have a prio"
+					" setting default to 8\n",ctl->DP);
+				q->prio=8;
+			}
+		} else {
+			q->prio=ctl->prio;
+		}
+	} else {
+		q->prio=8;
+	}
+
+
+	q->DP=ctl->DP;
+	q->Wlog = ctl->Wlog;
+	q->Plog = ctl->Plog;
+	q->limit = ctl->limit;
+	q->Scell_log = ctl->Scell_log;
+	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+	q->Scell_max = (255<<q->Scell_log);
+	q->qth_min = ctl->qth_min<<ctl->Wlog;
+	q->qth_max = ctl->qth_max<<ctl->Wlog;
+	q->qave=0;
+	q->backlog=0;
+	q->qcount = -1;
+	q->other=0;
+	q->forced=0;
+	q->pdrop=0;
+	q->early=0;
+
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+	memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
+
+	if ( table->initd && table->grio) {
+	/* this looks ugly but it's not in the fast path */
+		for (i=0;i<table->DPs;i++) {
+			if ((!table->tab[i]) || (i==q->DP) )    
+				continue; 
+			if (table->tab[i]->prio == q->prio ){
+				/* WRED mode detected */
+				table->eqp=1;
+				break;
+			}
+		}
+	}
+
+	if (!table->initd) {
+		table->initd=1;
+		/* 
+        	the first entry also goes into the default until
+        	over-written 
+		*/
+
+		if (table->tab[table->def] == NULL) {
+			table->tab[table->def]=
+				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
+			if (NULL == table->tab[table->def])
+				return -ENOMEM;
+
+			memset(table->tab[table->def], 0,
+			       (sizeof(struct gred_sched_data)));
+		}
+		q= table->tab[table->def]; 
+		q->DP=table->def;
+		q->Wlog = ctl->Wlog;
+		q->Plog = ctl->Plog;
+		q->limit = ctl->limit;
+		q->Scell_log = ctl->Scell_log;
+		q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+		q->Scell_max = (255<<q->Scell_log);
+		q->qth_min = ctl->qth_min<<ctl->Wlog;
+		q->qth_max = ctl->qth_max<<ctl->Wlog;
+
+		if (table->grio)
+			q->prio=table->tab[ctl->DP]->prio;
+		else
+			q->prio=8;
+
+		q->qcount = -1;
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
+	}
+	return 0;
+
+}
+
+static int gred_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct tc_gred_sopt *sopt;
+	struct rtattr *tb[TCA_GRED_STAB];
+	struct rtattr *tb2[TCA_GRED_DPS];
+
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
+		return -EINVAL;
+
+	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
+		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
+
+	    if (tb2[TCA_GRED_DPS-1] == 0) 
+			return -EINVAL;
+
+		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
+		table->DPs=sopt->DPs;   
+		table->def=sopt->def_DP; 
+		table->grio=sopt->grio; 
+		table->initd=0;
+		return 0;
+	}
+
+	DPRINTK("\n GRED_INIT error!\n");
+	return -EINVAL;
+}
+
+static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	unsigned long qave;
+	struct rtattr *rta;
+	struct tc_gred_qopt *opt = NULL ;
+	struct tc_gred_qopt *dst;
+	struct gred_sched *table = qdisc_priv(sch);
+	struct gred_sched_data *q;
+	int i;
+	unsigned char	 *b = skb->tail;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
+
+	if (opt  == NULL) {
+		DPRINTK("gred_dump:failed to malloc for %Zd\n",
+		    sizeof(struct tc_gred_qopt)*MAX_DPs);
+		goto rtattr_failure;
+	}
+
+	memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
+
+	if (!table->initd) {
+		DPRINTK("NO GRED Queues setup!\n");
+	}
+
+	for (i=0;i<MAX_DPs;i++) {
+		dst= &opt[i]; 
+		q= table->tab[i]; 
+
+		if (!q) {
+			/* hack -- fix at some point with proper message
+			   This is how we indicate to tc that there is no VQ
+			   at this DP */
+
+			dst->DP=MAX_DPs+i;
+			continue;
+		}
+
+		dst->limit=q->limit;
+		dst->qth_min=q->qth_min>>q->Wlog;
+		dst->qth_max=q->qth_max>>q->Wlog;
+		dst->DP=q->DP;
+		dst->backlog=q->backlog;
+		if (q->qave) {
+			if (table->eqp && table->grio) {
+				q->qidlestart=table->tab[table->def]->qidlestart;
+				q->qave=table->tab[table->def]->qave;
+			}
+			if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+				long idle;
+				psched_time_t now;
+				PSCHED_GET_TIME(now);
+				idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+				qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
+				dst->qave = qave >> q->Wlog;
+
+			} else {
+				dst->qave = q->qave >> q->Wlog;
+			}
+		} else {
+			dst->qave = 0;
+		}
+		
+
+		dst->Wlog = q->Wlog;
+		dst->Plog = q->Plog;
+		dst->Scell_log = q->Scell_log;
+		dst->other = q->other;
+		dst->forced = q->forced;
+		dst->early = q->early;
+		dst->pdrop = q->pdrop;
+		dst->prio = q->prio;
+		dst->packets=q->packetsin;
+		dst->bytesin=q->bytesin;
+	}
+
+	RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
+	rta->rta_len = skb->tail - b;
+
+	kfree(opt);
+	return skb->len;
+
+rtattr_failure:
+	if (opt)
+		kfree(opt);
+	DPRINTK("gred_dump: FAILURE!!!!\n");
+
+/* also free the opt struct here */
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static void gred_destroy(struct Qdisc *sch)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	int i;
+
+	for (i = 0;i < table->DPs; i++) {
+		if (table->tab[i])
+			kfree(table->tab[i]);
+	}
+}
+
+static struct Qdisc_ops gred_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"gred",
+	.priv_size	=	sizeof(struct gred_sched),
+	.enqueue	=	gred_enqueue,
+	.dequeue	=	gred_dequeue,
+	.requeue	=	gred_requeue,
+	.drop		=	gred_drop,
+	.init		=	gred_init,
+	.reset		=	gred_reset,
+	.destroy	=	gred_destroy,
+	.change		=	gred_change,
+	.dump		=	gred_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init gred_module_init(void)
+{
+	return register_qdisc(&gred_qdisc_ops);
+}
+static void __exit gred_module_exit(void) 
+{
+	unregister_qdisc(&gred_qdisc_ops);
+}
+module_init(gred_module_init)
+module_exit(gred_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -0,0 +1,436 @@
+/* net/sched/sch_ingress.c - Ingress qdisc 
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:     Jamal Hadi Salim 1999
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/smp.h>
+#include <net/pkt_sched.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+#include <linux/kmod.h>
+#include <linux/stat.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+
+
+#undef DEBUG_INGRESS
+
+#ifdef DEBUG_INGRESS  /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0  /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+#define PRIV(sch) qdisc_priv(sch)
+
+
+/* Thanks to Doron Oz for this hack
+*/
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+static int nf_registered; 
+#endif
+#endif
+
+struct ingress_qdisc_data {
+	struct Qdisc		*q;
+	struct tcf_proto	*filter_list;
+};
+
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+
+static int ingress_graft(struct Qdisc *sch,unsigned long arg,
+    struct Qdisc *new,struct Qdisc **old)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+
+	DPRINTK("ingress_graft(sch %p,[qdisc %p],new %p,old %p)\n",
+		sch, p, new, old);
+	DPRINTK("\n ingress_graft: You cannot add qdiscs to classes");
+        return 1;
+}
+
+
+static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+
+static unsigned long ingress_get(struct Qdisc *sch,u32 classid)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	return TC_H_MIN(classid) + 1;
+}
+
+
+static unsigned long ingress_bind_filter(struct Qdisc *sch,
+    unsigned long parent, u32 classid)
+{
+	return ingress_get(sch, classid);
+}
+
+
+static void ingress_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+
+static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
+    struct rtattr **tca, unsigned long *arg)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_change(sch %p,[qdisc %p],classid %x,parent %x),"
+		"arg 0x%lx\n", sch, p, classid, parent, *arg);
+	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
+	return 0;
+}
+
+
+
+static void ingress_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
+}
+
+
+static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch,unsigned long cl)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+
+	return &p->filter_list;
+}
+
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+
+static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+	struct tcf_result res;
+	int result;
+
+	D2PRINTK("ingress_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	result = tc_classify(skb, p->filter_list, &res);
+	D2PRINTK("result %d class 0x%04x\n", result, res.classid);
+	/*
+	 * Unlike normal "enqueue" functions, ingress_enqueue returns a
+	 * firewall FW_* code.
+	 */
+#ifdef CONFIG_NET_CLS_ACT
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+	switch (result) {
+		case TC_ACT_SHOT:
+			result = TC_ACT_SHOT;
+			sch->qstats.drops++;
+			break;
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			result = TC_ACT_STOLEN;
+			break;
+		case TC_ACT_RECLASSIFY: 
+		case TC_ACT_OK:
+		case TC_ACT_UNSPEC:
+		default:
+			skb->tc_index = TC_H_MIN(res.classid);
+			result = TC_ACT_OK;
+			break;
+	};
+/* backward compat */
+#else
+#ifdef	CONFIG_NET_CLS_POLICE  
+	switch (result) {
+		case TC_POLICE_SHOT:
+		result = NF_DROP;
+		sch->qstats.drops++;
+		break;
+		case TC_POLICE_RECLASSIFY: /* DSCP remarking here ? */
+		case TC_POLICE_OK:
+		case TC_POLICE_UNSPEC:
+		default:
+		sch->bstats.packets++;
+		sch->bstats.bytes += skb->len;
+		result = NF_ACCEPT;
+		break;
+	};
+
+#else
+	D2PRINTK("Overriding result to ACCEPT\n");
+	result = NF_ACCEPT;
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+#endif
+#endif
+
+	return result;
+}
+
+
+static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
+{
+/*
+	struct ingress_qdisc_data *p = PRIV(sch);
+	D2PRINTK("ingress_dequeue(sch %p,[qdisc %p])\n",sch,PRIV(p));
+*/
+	return NULL;
+}
+
+
+static int ingress_requeue(struct sk_buff *skb,struct Qdisc *sch)
+{
+/*
+	struct ingress_qdisc_data *p = PRIV(sch);
+	D2PRINTK("ingress_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,PRIV(p));
+*/
+	return 0;
+}
+
+static unsigned int ingress_drop(struct Qdisc *sch)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_drop(sch %p,[qdisc %p])\n", sch, p);
+	return 0;
+}
+
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+static unsigned int
+ing_hook(unsigned int hook, struct sk_buff **pskb,
+                             const struct net_device *indev,
+                             const struct net_device *outdev,
+	                     int (*okfn)(struct sk_buff *))
+{
+	
+	struct Qdisc *q;
+	struct sk_buff *skb = *pskb;
+        struct net_device *dev = skb->dev;
+	int fwres=NF_ACCEPT;
+
+	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
+		skb->sk ? "(owned)" : "(unowned)",
+		skb->dev ? (*pskb)->dev->name : "(no dev)",
+		skb->len);
+
+/* 
+revisit later: Use a private since lock dev->queue_lock is also
+used on the egress (might slow things for an iota)
+*/
+
+	if (dev->qdisc_ingress) {
+		spin_lock(&dev->queue_lock);
+		if ((q = dev->qdisc_ingress) != NULL)
+			fwres = q->enqueue(skb, q);
+		spin_unlock(&dev->queue_lock);
+        }
+			
+	return fwres;
+}
+
+/* after ipt_filter */
+static struct nf_hook_ops ing_ops = {
+	.hook           = ing_hook,
+	.owner		= THIS_MODULE,
+	.pf             = PF_INET,
+	.hooknum        = NF_IP_PRE_ROUTING,
+	.priority       = NF_IP_PRI_FILTER + 1,
+};
+
+static struct nf_hook_ops ing6_ops = {
+	.hook           = ing_hook,
+	.owner		= THIS_MODULE,
+	.pf             = PF_INET6,
+	.hooknum        = NF_IP6_PRE_ROUTING,
+	.priority       = NF_IP6_PRI_FILTER + 1,
+};
+
+#endif
+#endif
+
+static int ingress_init(struct Qdisc *sch,struct rtattr *opt)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+
+/* Make sure either netfilter or preferably CLS_ACT is
+* compiled in */
+#ifndef CONFIG_NET_CLS_ACT
+#ifndef CONFIG_NETFILTER
+	printk("You MUST compile classifier actions into the kernel\n");
+	return -EINVAL;
+#else
+	printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
+#endif
+#endif
+                                                                                
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+	if (!nf_registered) {
+		if (nf_register_hook(&ing_ops) < 0) {
+			printk("ingress qdisc registration error \n");
+			return -EINVAL;
+		}
+		nf_registered++;
+
+		if (nf_register_hook(&ing6_ops) < 0) {
+			printk("IPv6 ingress qdisc registration error, " \
+			    "disabling IPv6 support.\n");
+		} else
+			nf_registered++;
+	}
+#endif
+#endif
+
+	DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	p->q = &noop_qdisc;
+	return 0;
+}
+
+
+static void ingress_reset(struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("ingress_reset(sch %p,[qdisc %p])\n", sch, p);
+
+/*
+#if 0
+*/
+/* for future use */
+	qdisc_reset(p->q);
+/*
+#endif
+*/
+}
+
+/* ------------------------------------------------------------- */
+
+
+/* ------------------------------------------------------------- */
+
+static void ingress_destroy(struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+	struct tcf_proto *tp;
+
+	DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
+	while (p->filter_list) {
+		tp = p->filter_list;
+		p->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+#if 0
+/* for future use */
+	qdisc_destroy(p->q);
+#endif
+}
+
+
+static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	rta->rta_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct Qdisc_class_ops ingress_class_ops = {
+	.graft		=	ingress_graft,
+	.leaf		=	ingress_leaf,
+	.get		=	ingress_get,
+	.put		=	ingress_put,
+	.change		=	ingress_change,
+	.delete		=	NULL,
+	.walk		=	ingress_walk,
+	.tcf_chain	=	ingress_find_tcf,
+	.bind_tcf	=	ingress_bind_filter,
+	.unbind_tcf	=	ingress_put,
+	.dump		=	NULL,
+};
+
+static struct Qdisc_ops ingress_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&ingress_class_ops,
+	.id		=	"ingress",
+	.priv_size	=	sizeof(struct ingress_qdisc_data),
+	.enqueue	=	ingress_enqueue,
+	.dequeue	=	ingress_dequeue,
+	.requeue	=	ingress_requeue,
+	.drop		=	ingress_drop,
+	.init		=	ingress_init,
+	.reset		=	ingress_reset,
+	.destroy	=	ingress_destroy,
+	.change		=	NULL,
+	.dump		=	ingress_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init ingress_module_init(void)
+{
+	int ret = 0;
+
+	if ((ret = register_qdisc(&ingress_qdisc_ops)) < 0) {
+		printk("Unable to register Ingress qdisc\n");
+		return ret;
+	}
+
+	return ret;
+}
+static void __exit ingress_module_exit(void) 
+{
+	unregister_qdisc(&ingress_qdisc_ops);
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+	if (nf_registered) {
+		nf_unregister_hook(&ing_ops);
+		if (nf_registered > 1)
+			nf_unregister_hook(&ing6_ops);
+	}
+#endif
+#endif
+}
+module_init(ingress_module_init)
+module_exit(ingress_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -0,0 +1,598 @@
+/*
+ * net/sched/sch_netem.c	Network emulator
+ *
+ * 		This program is free software; you can redistribute it and/or
+ * 		modify it under the terms of the GNU General Public License
+ * 		as published by the Free Software Foundation; either version
+ * 		2 of the License, or (at your option) any later version.
+ *
+ *  		Many of the algorithms and ideas for this came from
+ *		NIST Net which is not copyrighted. 
+ *
+ * Authors:	Stephen Hemminger <shemminger@osdl.org>
+ *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/pkt_sched.h>
+
+/*	Network Emulation Queuing algorithm.
+	====================================
+
+	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
+		 Network Emulation Tool
+		 [2] Luigi Rizzo, DummyNet for FreeBSD
+
+	 ----------------------------------------------------------------
+
+	 This started out as a simple way to delay outgoing packets to
+	 test TCP but has grown to include most of the functionality
+	 of a full blown network emulator like NISTnet. It can delay
+	 packets and add random jitter (and correlation). The random
+	 distribution can be loaded from a table as well to provide
+	 normal, Pareto, or experimental curves. Packet loss,
+	 duplication, and reordering can also be emulated.
+
+	 This qdisc does not do classification that can be handled in
+	 layering other disciplines.  It does not need to do bandwidth
+	 control either since that can be handled by using token
+	 bucket or other rate control.
+
+	 The simulator is limited by the Linux timer resolution
+	 and will create packet bursts on the HZ boundary (1ms).
+*/
+
+struct netem_sched_data {
+	struct Qdisc	*qdisc;
+	struct sk_buff_head delayed;
+	struct timer_list timer;
+
+	u32 latency;
+	u32 loss;
+	u32 limit;
+	u32 counter;
+	u32 gap;
+	u32 jitter;
+	u32 duplicate;
+
+	struct crndstate {
+		unsigned long last;
+		unsigned long rho;
+	} delay_cor, loss_cor, dup_cor;
+
+	struct disttable {
+		u32  size;
+		s16 table[0];
+	} *delay_dist;
+};
+
+/* Time stamp put into socket buffer control block */
+struct netem_skb_cb {
+	psched_time_t	time_to_send;
+};
+
+/* init_crandom - initialize correlated random number generator
+ * Use entropy source for initial seed.
+ */
+static void init_crandom(struct crndstate *state, unsigned long rho)
+{
+	state->rho = rho;
+	state->last = net_random();
+}
+
+/* get_crandom - correlated random number generator
+ * Next number depends on last value.
+ * rho is scaled to avoid floating point.
+ */
+static unsigned long get_crandom(struct crndstate *state)
+{
+	u64 value, rho;
+	unsigned long answer;
+
+	if (state->rho == 0)	/* no correllation */
+		return net_random();
+
+	value = net_random();
+	rho = (u64)state->rho + 1;
+	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
+	state->last = answer;
+	return answer;
+}
+
+/* tabledist - return a pseudo-randomly distributed value with mean mu and
+ * std deviation sigma.  Uses table lookup to approximate the desired
+ * distribution, and a uniformly-distributed pseudo-random source.
+ */
+static long tabledist(unsigned long mu, long sigma, 
+		      struct crndstate *state, const struct disttable *dist)
+{
+	long t, x;
+	unsigned long rnd;
+
+	if (sigma == 0)
+		return mu;
+
+	rnd = get_crandom(state);
+
+	/* default uniform distribution */
+	if (dist == NULL) 
+		return (rnd % (2*sigma)) - sigma + mu;
+
+	t = dist->table[rnd % dist->size];
+	x = (sigma % NETEM_DIST_SCALE) * t;
+	if (x >= 0)
+		x += NETEM_DIST_SCALE/2;
+	else
+		x -= NETEM_DIST_SCALE/2;
+
+	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
+}
+
+/* Put skb in the private delayed queue. */
+static int delay_skb(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+	psched_tdiff_t td;
+	psched_time_t now;
+	
+	PSCHED_GET_TIME(now);
+	td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
+	PSCHED_TADD2(now, td, cb->time_to_send);
+	
+	/* Always queue at tail to keep packets in order */
+	if (likely(q->delayed.qlen < q->limit)) {
+		__skb_queue_tail(&q->delayed, skb);
+		if (!timer_pending(&q->timer)) {
+			q->timer.expires = jiffies + PSCHED_US2JIFFIE(td);
+			add_timer(&q->timer);
+		}
+		return NET_XMIT_SUCCESS;
+	}
+
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb2;
+	int ret;
+
+	pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
+
+	/* Random packet drop 0 => none, ~0 => all */
+	if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
+		pr_debug("netem_enqueue: random loss\n");
+		sch->qstats.drops++;
+		kfree_skb(skb);
+		return 0;	/* lie about loss so TCP doesn't know */
+	}
+
+	/* Random duplication */
+	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)
+	    && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
+		pr_debug("netem_enqueue: dup %p\n", skb2);
+
+		if (delay_skb(sch, skb2)) {
+			sch->q.qlen++;
+			sch->bstats.bytes += skb2->len;
+			sch->bstats.packets++;
+		} else
+			sch->qstats.drops++;
+	}
+
+	/* If doing simple delay then gap == 0 so all packets
+	 * go into the delayed holding queue
+	 * otherwise if doing out of order only "1 out of gap"
+	 * packets will be delayed.
+	 */
+	if (q->counter < q->gap) {
+		++q->counter;
+		ret = q->qdisc->enqueue(skb, q->qdisc);
+	} else {
+		q->counter = 0;
+		ret = delay_skb(sch, skb);
+	}
+
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		sch->q.qlen++;
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+	} else
+		sch->qstats.drops++;
+
+	return ret;
+}
+
+/* Requeue packets but don't change time stamp */
+static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	}
+
+	return ret;
+}
+
+static unsigned int netem_drop(struct Qdisc* sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned int len;
+
+	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+		sch->q.qlen--;
+		sch->qstats.drops++;
+	}
+	return len;
+}
+
+/* Dequeue packet.
+ *  Move all packets that are ready to send from the delay holding
+ *  list to the underlying qdisc, then just call dequeue
+ */
+static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = q->qdisc->dequeue(q->qdisc);
+	if (skb) 
+		sch->q.qlen--;
+	return skb;
+}
+
+static void netem_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = sch->dev;
+	struct sk_buff *skb;
+	psched_time_t now;
+
+	pr_debug("netem_watchdog: fired @%lu\n", jiffies);
+
+	spin_lock_bh(&dev->queue_lock);
+	PSCHED_GET_TIME(now);
+
+	while ((skb = skb_peek(&q->delayed)) != NULL) {
+		const struct netem_skb_cb *cb
+			= (const struct netem_skb_cb *)skb->cb;
+		long delay 
+			= PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
+		pr_debug("netem_watchdog: skb %p@%lu %ld\n",
+			 skb, jiffies, delay);
+
+		/* if more time remaining? */
+		if (delay > 0) {
+			mod_timer(&q->timer, jiffies + delay);
+			break;
+		}
+		__skb_unlink(skb, &q->delayed);
+
+		if (q->qdisc->enqueue(skb, q->qdisc)) {
+			sch->q.qlen--;
+			sch->qstats.drops++;
+		}
+	}
+	qdisc_run(dev);
+	spin_unlock_bh(&dev->queue_lock);
+}
+
+static void netem_reset(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	skb_queue_purge(&q->delayed);
+
+	sch->q.qlen = 0;
+	del_timer_sync(&q->timer);
+}
+
+static int set_fifo_limit(struct Qdisc *q, int limit)
+{
+        struct rtattr *rta;
+	int ret = -ENOMEM;
+
+	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+	if (rta) {
+		rta->rta_type = RTM_NEWQDISC;
+		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 
+		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+		
+		ret = q->ops->change(q, rta);
+		kfree(rta);
+	}
+	return ret;
+}
+
+/*
+ * Distribution data is a variable size payload containing
+ * signed 16 bit values.
+ */
+static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
+	const __s16 *data = RTA_DATA(attr);
+	struct disttable *d;
+	int i;
+
+	if (n > 65536)
+		return -EINVAL;
+
+	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->size = n;
+	for (i = 0; i < n; i++)
+		d->table[i] = data[i];
+	
+	spin_lock_bh(&sch->dev->queue_lock);
+	d = xchg(&q->delay_dist, d);
+	spin_unlock_bh(&sch->dev->queue_lock);
+
+	kfree(d);
+	return 0;
+}
+
+static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corr *c = RTA_DATA(attr);
+
+	if (RTA_PAYLOAD(attr) != sizeof(*c))
+		return -EINVAL;
+
+	init_crandom(&q->delay_cor, c->delay_corr);
+	init_crandom(&q->loss_cor, c->loss_corr);
+	init_crandom(&q->dup_cor, c->dup_corr);
+	return 0;
+}
+
+static int netem_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct tc_netem_qopt *qopt;
+	int ret;
+	
+	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = RTA_DATA(opt);
+	ret = set_fifo_limit(q->qdisc, qopt->limit);
+	if (ret) {
+		pr_debug("netem: can't set fifo limit\n");
+		return ret;
+	}
+	
+	q->latency = qopt->latency;
+	q->jitter = qopt->jitter;
+	q->limit = qopt->limit;
+	q->gap = qopt->gap;
+	q->loss = qopt->loss;
+	q->duplicate = qopt->duplicate;
+
+	/* Handle nested options after initial queue options.
+	 * Should have put all options in nested format but too late now.
+	 */ 
+	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
+		struct rtattr *tb[TCA_NETEM_MAX];
+		if (rtattr_parse(tb, TCA_NETEM_MAX, 
+				 RTA_DATA(opt) + sizeof(*qopt),
+				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
+			return -EINVAL;
+
+		if (tb[TCA_NETEM_CORR-1]) {
+			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
+			if (ret)
+				return ret;
+		}
+
+		if (tb[TCA_NETEM_DELAY_DIST-1]) {
+			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
+			if (ret)
+				return ret;
+		}
+	}
+
+
+	return 0;
+}
+
+static int netem_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if (!opt)
+		return -EINVAL;
+
+	skb_queue_head_init(&q->delayed);
+	init_timer(&q->timer);
+	q->timer.function = netem_watchdog;
+	q->timer.data = (unsigned long) sch;
+	q->counter = 0;
+
+	q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+	if (!q->qdisc) {
+		pr_debug("netem: qdisc create failed\n");
+		return -ENOMEM;
+	}
+
+	ret = netem_change(sch, opt);
+	if (ret) {
+		pr_debug("netem: change failed\n");
+		qdisc_destroy(q->qdisc);
+	}
+	return ret;
+}
+
+static void netem_destroy(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	del_timer_sync(&q->timer);
+	qdisc_destroy(q->qdisc);
+	kfree(q->delay_dist);
+}
+
+static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	const struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta = (struct rtattr *) b;
+	struct tc_netem_qopt qopt;
+	struct tc_netem_corr cor;
+
+	qopt.latency = q->latency;
+	qopt.jitter = q->jitter;
+	qopt.limit = q->limit;
+	qopt.loss = q->loss;
+	qopt.gap = q->gap;
+	qopt.duplicate = q->duplicate;
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+
+	cor.delay_corr = q->delay_cor.rho;
+	cor.loss_corr = q->loss_cor.rho;
+	cor.dup_corr = q->dup_cor.rho;
+	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_reset(*old);
+	sch->q.qlen = 0;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void netem_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 
+			    struct rtattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int netem_delete(struct Qdisc *sch, unsigned long arg)
+{
+	return -ENOSYS;
+}
+
+static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static struct Qdisc_class_ops netem_class_ops = {
+	.graft		=	netem_graft,
+	.leaf		=	netem_leaf,
+	.get		=	netem_get,
+	.put		=	netem_put,
+	.change		=	netem_change_class,
+	.delete		=	netem_delete,
+	.walk		=	netem_walk,
+	.tcf_chain	=	netem_find_tcf,
+	.dump		=	netem_dump_class,
+};
+
+static struct Qdisc_ops netem_qdisc_ops = {
+	.id		=	"netem",
+	.cl_ops		=	&netem_class_ops,
+	.priv_size	=	sizeof(struct netem_sched_data),
+	.enqueue	=	netem_enqueue,
+	.dequeue	=	netem_dequeue,
+	.requeue	=	netem_requeue,
+	.drop		=	netem_drop,
+	.init		=	netem_init,
+	.reset		=	netem_reset,
+	.destroy	=	netem_destroy,
+	.change		=	netem_change,
+	.dump		=	netem_dump,
+	.owner		=	THIS_MODULE,
+};
+
+
+static int __init netem_module_init(void)
+{
+	return register_qdisc(&netem_qdisc_ops);
+}
+static void __exit netem_module_exit(void)
+{
+	unregister_qdisc(&netem_qdisc_ops);
+}
+module_init(netem_module_init)
+module_exit(netem_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -0,0 +1,444 @@
+/*
+ * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>: 
+ *              Init --  EINVAL when opt undefined
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+struct prio_sched_data
+{
+	int bands;
+	struct tcf_proto *filter_list;
+	u8  prio2band[TC_PRIO_MAX+1];
+	struct Qdisc *queues[TCQ_PRIO_BANDS];
+};
+
+
+static struct Qdisc *
+prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	u32 band = skb->priority;
+	struct tcf_result res;
+
+	*qerr = NET_XMIT_DROP;
+	if (TC_H_MAJ(skb->priority) != sch->handle) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (tc_classify(skb, q->filter_list, &res)) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS;
+		case TC_ACT_SHOT:
+			return NULL;
+		};
+
+		if (!q->filter_list ) {
+#else
+		if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) {
+#endif
+			if (TC_H_MAJ(band))
+				band = 0;
+			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+		}
+		band = res.classid;
+	}
+	band = TC_H_MIN(band) - 1;
+	if (band > q->bands)
+		return q->queues[q->prio2band[0]];
+
+	return q->queues[band];
+}
+
+static int
+prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = prio_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret == NET_XMIT_DROP)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	sch->qstats.drops++;
+	return ret; 
+}
+
+
+static int
+prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = prio_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret == NET_XMIT_DROP)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return 0;
+	}
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+
+static struct sk_buff *
+prio_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+	struct Qdisc *qdisc;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		qdisc = q->queues[prio];
+		skb = qdisc->dequeue(qdisc);
+		if (skb) {
+			sch->q.qlen--;
+			return skb;
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int prio_drop(struct Qdisc* sch)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (prio = q->bands-1; prio >= 0; prio--) {
+		qdisc = q->queues[prio];
+		if ((len = qdisc->ops->drop(qdisc)) != 0) {
+			sch->q.qlen--;
+			return len;
+		}
+	}
+	return 0;
+}
+
+
+static void
+prio_reset(struct Qdisc* sch)
+{
+	int prio;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	for (prio=0; prio<q->bands; prio++)
+		qdisc_reset(q->queues[prio]);
+	sch->q.qlen = 0;
+}
+
+static void
+prio_destroy(struct Qdisc* sch)
+{
+	int prio;
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tcf_proto *tp;
+
+	while ((tp = q->filter_list) != NULL) {
+		q->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+
+	for (prio=0; prio<q->bands; prio++)
+		qdisc_destroy(q->queues[prio]);
+}
+
+static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tc_prio_qopt *qopt = RTA_DATA(opt);
+	int i;
+
+	if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+		return -EINVAL;
+	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+		return -EINVAL;
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		if (qopt->priomap[i] >= qopt->bands)
+			return -EINVAL;
+	}
+
+	sch_tree_lock(sch);
+	q->bands = qopt->bands;
+	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
+
+	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+		if (child != &noop_qdisc)
+			qdisc_destroy(child);
+	}
+	sch_tree_unlock(sch);
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		int band = q->prio2band[i];
+		if (q->queues[band] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[band], child);
+
+				if (child != &noop_qdisc)
+					qdisc_destroy(child);
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int prio_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	for (i=0; i<TCQ_PRIO_BANDS; i++)
+		q->queues[i] = &noop_qdisc;
+
+	if (opt == NULL) {
+		return -EINVAL;
+	} else {
+		int err;
+
+		if ((err= prio_tune(sch, opt)) != 0)
+			return err;
+	}
+	return 0;
+}
+
+static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct tc_prio_qopt opt;
+
+	opt.bands = q->bands;
+	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	sch->q.qlen -= (*old)->q.qlen;
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+prio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+	return prio_get(sch, classid);
+}
+
+
+static void prio_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int prio_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
+			   struct tcmsg *tcm)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	if (arg->stop)
+		return;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, prio+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static struct Qdisc_class_ops prio_class_ops = {
+	.graft		=	prio_graft,
+	.leaf		=	prio_leaf,
+	.get		=	prio_get,
+	.put		=	prio_put,
+	.change		=	prio_change,
+	.delete		=	prio_delete,
+	.walk		=	prio_walk,
+	.tcf_chain	=	prio_find_tcf,
+	.bind_tcf	=	prio_bind,
+	.unbind_tcf	=	prio_put,
+	.dump		=	prio_dump_class,
+};
+
+static struct Qdisc_ops prio_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&prio_class_ops,
+	.id		=	"prio",
+	.priv_size	=	sizeof(struct prio_sched_data),
+	.enqueue	=	prio_enqueue,
+	.dequeue	=	prio_dequeue,
+	.requeue	=	prio_requeue,
+	.drop		=	prio_drop,
+	.init		=	prio_init,
+	.reset		=	prio_reset,
+	.destroy	=	prio_destroy,
+	.change		=	prio_tune,
+	.dump		=	prio_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init prio_module_init(void)
+{
+	return register_qdisc(&prio_qdisc_ops);
+}
+
+static void __exit prio_module_exit(void) 
+{
+	unregister_qdisc(&prio_qdisc_ops);
+}
+
+module_init(prio_module_init)
+module_exit(prio_module_exit)
+
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -0,0 +1,459 @@
+/*
+ * net/sched/sch_red.c	Random Early Detection queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * J Hadi Salim <hadi@nortel.com> 980914:	computation fixes
+ * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
+ * J Hadi Salim <hadi@nortelnetworks.com> 980816:  ECN support	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/dsfield.h>
+
+
+/*	Random Early Detection (RED) algorithm.
+	=======================================
+
+	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
+	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
+
+	This file codes a "divisionless" version of RED algorithm
+	as written down in Fig.17 of the paper.
+
+Short description.
+------------------
+
+	When a new packet arrives we calculate the average queue length:
+
+	avg = (1-W)*avg + W*current_queue_len,
+
+	W is the filter time constant (chosen as 2^(-Wlog)), it controls
+	the inertia of the algorithm. To allow larger bursts, W should be
+	decreased.
+
+	if (avg > th_max) -> packet marked (dropped).
+	if (avg < th_min) -> packet passes.
+	if (th_min < avg < th_max) we calculate probability:
+
+	Pb = max_P * (avg - th_min)/(th_max-th_min)
+
+	and mark (drop) packet with this probability.
+	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
+	max_P should be small (not 1), usually 0.01..0.02 is good value.
+
+	max_P is chosen as a number, so that max_P/(th_max-th_min)
+	is a negative power of two in order arithmetics to contain
+	only shifts.
+
+
+	Parameters, settable by user:
+	-----------------------------
+
+	limit		- bytes (must be > qth_max + burst)
+
+	Hard limit on queue length, should be chosen >qth_max
+	to allow packet bursts. This parameter does not
+	affect the algorithms behaviour and can be chosen
+	arbitrarily high (well, less than ram size)
+	Really, this limit will never be reached
+	if RED works correctly.
+
+	qth_min		- bytes (should be < qth_max/2)
+	qth_max		- bytes (should be at least 2*qth_min and less limit)
+	Wlog	       	- bits (<32) log(1/W).
+	Plog	       	- bits (<32)
+
+	Plog is related to max_P by formula:
+
+	max_P = (qth_max-qth_min)/2^Plog;
+
+	F.e. if qth_max=128K and qth_min=32K, then Plog=22
+	corresponds to max_P=0.02
+
+	Scell_log
+	Stab
+
+	Lookup table for log((1-W)^(t/t_ave).
+
+
+NOTES:
+
+Upper bound on W.
+-----------------
+
+	If you want to allow bursts of L packets of size S,
+	you should choose W:
+
+	L + 1 - th_min/S < (1-(1-W)^L)/W
+
+	th_min/S = 32         th_min/S = 4
+			                       
+	log(W)	L
+	-1	33
+	-2	35
+	-3	39
+	-4	46
+	-5	57
+	-6	75
+	-7	101
+	-8	135
+	-9	190
+	etc.
+ */
+
+struct red_sched_data
+{
+/* Parameters */
+	u32		limit;		/* HARD maximal queue length	*/
+	u32		qth_min;	/* Min average length threshold: A scaled */
+	u32		qth_max;	/* Max average length threshold: A scaled */
+	u32		Rmask;
+	u32		Scell_max;
+	unsigned char	flags;
+	char		Wlog;		/* log(W)		*/
+	char		Plog;		/* random number bits	*/
+	char		Scell_log;
+	u8		Stab[256];
+
+/* Variables */
+	unsigned long	qave;		/* Average queue length: A scaled */
+	int		qcount;		/* Packets since last random number generation */
+	u32		qR;		/* Cached random number */
+
+	psched_time_t	qidlestart;	/* Start of idle period		*/
+	struct tc_red_xstats st;
+};
+
+static int red_ecn_mark(struct sk_buff *skb)
+{
+	if (skb->nh.raw + 20 > skb->tail)
+		return 0;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		if (INET_ECN_is_not_ect(skb->nh.iph->tos))
+			return 0;
+		IP_ECN_set_ce(skb->nh.iph);
+		return 1;
+	case __constant_htons(ETH_P_IPV6):
+		if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h)))
+			return 0;
+		IP6_ECN_set_ce(skb->nh.ipv6h);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int
+red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	psched_time_t now;
+
+	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+		long us_idle;
+		int  shift;
+
+		PSCHED_GET_TIME(now);
+		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+/*
+   The problem: ideally, average length queue recalcultion should
+   be done over constant clock intervals. This is too expensive, so that
+   the calculation is driven by outgoing packets.
+   When the queue is idle we have to model this clock by hand.
+
+   SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
+   dummy packets as a burst after idle time, i.e.
+
+          q->qave *= (1-W)^m
+
+   This is an apparently overcomplicated solution (f.e. we have to precompute
+   a table to make this calculation in reasonable time)
+   I believe that a simpler model may be used here,
+   but it is field for experiments.
+*/
+		shift = q->Stab[us_idle>>q->Scell_log];
+
+		if (shift) {
+			q->qave >>= shift;
+		} else {
+			/* Approximate initial part of exponent
+			   with linear function:
+			   (1-W)^m ~= 1-mW + ...
+
+			   Seems, it is the best solution to
+			   problem of too coarce exponent tabulation.
+			 */
+
+			us_idle = (q->qave * us_idle)>>q->Scell_log;
+			if (us_idle < q->qave/2)
+				q->qave -= us_idle;
+			else
+				q->qave >>= 1;
+		}
+	} else {
+		q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
+		/* NOTE:
+		   q->qave is fixed point number with point at Wlog.
+		   The formulae above is equvalent to floating point
+		   version:
+
+		   qave = qave*(1-W) + sch->qstats.backlog*W;
+		                                           --ANK (980924)
+		 */
+	}
+
+	if (q->qave < q->qth_min) {
+		q->qcount = -1;
+enqueue:
+		if (sch->qstats.backlog + skb->len <= q->limit) {
+			__skb_queue_tail(&sch->q, skb);
+			sch->qstats.backlog += skb->len;
+			sch->bstats.bytes += skb->len;
+			sch->bstats.packets++;
+			return NET_XMIT_SUCCESS;
+		} else {
+			q->st.pdrop++;
+		}
+		kfree_skb(skb);
+		sch->qstats.drops++;
+		return NET_XMIT_DROP;
+	}
+	if (q->qave >= q->qth_max) {
+		q->qcount = -1;
+		sch->qstats.overlimits++;
+mark:
+		if  (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) {
+			q->st.early++;
+			goto drop;
+		}
+		q->st.marked++;
+		goto enqueue;
+	}
+
+	if (++q->qcount) {
+		/* The formula used below causes questions.
+
+		   OK. qR is random number in the interval 0..Rmask
+		   i.e. 0..(2^Plog). If we used floating point
+		   arithmetics, it would be: (2^Plog)*rnd_num,
+		   where rnd_num is less 1.
+
+		   Taking into account, that qave have fixed
+		   point at Wlog, and Plog is related to max_P by
+		   max_P = (qth_max-qth_min)/2^Plog; two lines
+		   below have the following floating point equivalent:
+		   
+		   max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
+
+		   Any questions? --ANK (980924)
+		 */
+		if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
+			goto enqueue;
+		q->qcount = 0;
+		q->qR = net_random()&q->Rmask;
+		sch->qstats.overlimits++;
+		goto mark;
+	}
+	q->qR = net_random()&q->Rmask;
+	goto enqueue;
+
+drop:
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_CN;
+}
+
+static int
+red_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.backlog += skb->len;
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+red_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	skb = __skb_dequeue(&sch->q);
+	if (skb) {
+		sch->qstats.backlog -= skb->len;
+		return skb;
+	}
+	PSCHED_GET_TIME(q->qidlestart);
+	return NULL;
+}
+
+static unsigned int red_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		unsigned int len = skb->len;
+		sch->qstats.backlog -= len;
+		sch->qstats.drops++;
+		q->st.other++;
+		kfree_skb(skb);
+		return len;
+	}
+	PSCHED_GET_TIME(q->qidlestart);
+	return 0;
+}
+
+static void red_reset(struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_purge(&sch->q);
+	sch->qstats.backlog = 0;
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+	q->qave = 0;
+	q->qcount = -1;
+}
+
+static int red_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct rtattr *tb[TCA_RED_STAB];
+	struct tc_red_qopt *ctl;
+
+	if (opt == NULL ||
+	    rtattr_parse_nested(tb, TCA_RED_STAB, opt) ||
+	    tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 ||
+	    RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
+	    RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256)
+		return -EINVAL;
+
+	ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
+
+	sch_tree_lock(sch);
+	q->flags = ctl->flags;
+	q->Wlog = ctl->Wlog;
+	q->Plog = ctl->Plog;
+	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+	q->Scell_log = ctl->Scell_log;
+	q->Scell_max = (255<<q->Scell_log);
+	q->qth_min = ctl->qth_min<<ctl->Wlog;
+	q->qth_max = ctl->qth_max<<ctl->Wlog;
+	q->limit = ctl->limit;
+	memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
+
+	q->qcount = -1;
+	if (skb_queue_len(&sch->q) == 0)
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int red_init(struct Qdisc* sch, struct rtattr *opt)
+{
+	return red_change(sch, opt);
+}
+
+static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_red_qopt opt;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	opt.limit = q->limit;
+	opt.qth_min = q->qth_min>>q->Wlog;
+	opt.qth_max = q->qth_max>>q->Wlog;
+	opt.Wlog = q->Wlog;
+	opt.Plog = q->Plog;
+	opt.Scell_log = q->Scell_log;
+	opt.flags = q->flags;
+	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	return gnet_stats_copy_app(d, &q->st, sizeof(q->st));
+}
+
+static struct Qdisc_ops red_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"red",
+	.priv_size	=	sizeof(struct red_sched_data),
+	.enqueue	=	red_enqueue,
+	.dequeue	=	red_dequeue,
+	.requeue	=	red_requeue,
+	.drop		=	red_drop,
+	.init		=	red_init,
+	.reset		=	red_reset,
+	.change		=	red_change,
+	.dump		=	red_dump,
+	.dump_stats	=	red_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init red_module_init(void)
+{
+	return register_qdisc(&red_qdisc_ops);
+}
+static void __exit red_module_exit(void) 
+{
+	unregister_qdisc(&red_qdisc_ops);
+}
+module_init(red_module_init)
+module_exit(red_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -0,0 +1,497 @@
+/*
+ * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/*	Stochastic Fairness Queuing algorithm.
+	=======================================
+
+	Source:
+	Paul E. McKenney "Stochastic Fairness Queuing",
+	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
+
+	Paul E. McKenney "Stochastic Fairness Queuing",
+	"Interworking: Research and Experience", v.2, 1991, p.113-131.
+
+
+	See also:
+	M. Shreedhar and George Varghese "Efficient Fair
+	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
+
+
+	This is not the thing that is usually called (W)FQ nowadays. 
+	It does not use any timestamp mechanism, but instead
+	processes queues in round-robin order.
+
+	ADVANTAGE:
+
+	- It is very cheap. Both CPU and memory requirements are minimal.
+
+	DRAWBACKS:
+
+	- "Stochastic" -> It is not 100% fair. 
+	When hash collisions occur, several flows are considered as one.
+
+	- "Round-robin" -> It introduces larger delays than virtual clock
+	based schemes, and should not be used for isolating interactive
+	traffic	from non-interactive. It means, that this scheduler
+	should be used as leaf of CBQ or P3, which put interactive traffic
+	to higher priority band.
+
+	We still need true WFQ for top level CSZ, but using WFQ
+	for the best effort traffic is absolutely pointless:
+	SFQ is superior for this purpose.
+
+	IMPLEMENTATION:
+	This implementation limits maximal queue length to 128;
+	maximal mtu to 2^15-1; number of hash buckets to 1024.
+	The only goal of this restrictions was that all data
+	fit into one 4K page :-). Struct sfq_sched_data is
+	organized in anti-cache manner: all the data for a bucket
+	are scattered over different locations. This is not good,
+	but it allowed me to put it into 4K.
+
+	It is easy to increase these values, but not in flight.  */
+
+#define SFQ_DEPTH		128
+#define SFQ_HASH_DIVISOR	1024
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned char sfq_index;
+
+struct sfq_head
+{
+	sfq_index	next;
+	sfq_index	prev;
+};
+
+struct sfq_sched_data
+{
+/* Parameters */
+	int		perturb_period;
+	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
+	int		limit;
+
+/* Variables */
+	struct timer_list perturb_timer;
+	int		perturbation;
+	sfq_index	tail;		/* Index of current slot in round */
+	sfq_index	max_depth;	/* Maximal depth */
+
+	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
+	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
+	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
+	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
+	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
+	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+};
+
+static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+{
+	int pert = q->perturbation;
+
+	/* Have we any rotation primitives? If not, WHY? */
+	h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
+	h ^= h>>10;
+	return h & 0x3FF;
+}
+
+static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+{
+	u32 h, h2;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+	{
+		struct iphdr *iph = skb->nh.iph;
+		h = iph->daddr;
+		h2 = iph->saddr^iph->protocol;
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    (iph->protocol == IPPROTO_TCP ||
+		     iph->protocol == IPPROTO_UDP ||
+		     iph->protocol == IPPROTO_ESP))
+			h2 ^= *(((u32*)iph) + iph->ihl);
+		break;
+	}
+	case __constant_htons(ETH_P_IPV6):
+	{
+		struct ipv6hdr *iph = skb->nh.ipv6h;
+		h = iph->daddr.s6_addr32[3];
+		h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
+		if (iph->nexthdr == IPPROTO_TCP ||
+		    iph->nexthdr == IPPROTO_UDP ||
+		    iph->nexthdr == IPPROTO_ESP)
+			h2 ^= *(u32*)&iph[1];
+		break;
+	}
+	default:
+		h = (u32)(unsigned long)skb->dst^skb->protocol;
+		h2 = (u32)(unsigned long)skb->sk;
+	}
+	return sfq_fold_hash(q, h, h2);
+}
+
+static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+	int d = q->qs[x].qlen + SFQ_DEPTH;
+
+	p = d;
+	n = q->dep[d].next;
+	q->dep[x].next = n;
+	q->dep[x].prev = p;
+	q->dep[p].next = q->dep[n].prev = x;
+}
+
+static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+
+	if (n == p && q->max_depth == q->qs[x].qlen + 1)
+		q->max_depth--;
+
+	sfq_link(q, x);
+}
+
+static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+	int d;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+	d = q->qs[x].qlen;
+	if (q->max_depth < d)
+		q->max_depth = d;
+
+	sfq_link(q, x);
+}
+
+static unsigned int sfq_drop(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_index d = q->max_depth;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	/* Queue is full! Find the longest slot and
+	   drop a packet from it */
+
+	if (d > 1) {
+		sfq_index x = q->dep[d+SFQ_DEPTH].next;
+		skb = q->qs[x].prev;
+		len = skb->len;
+		__skb_unlink(skb, &q->qs[x]);
+		kfree_skb(skb);
+		sfq_dec(q, x);
+		sch->q.qlen--;
+		sch->qstats.drops++;
+		return len;
+	}
+
+	if (d == 1) {
+		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+		d = q->next[q->tail];
+		q->next[q->tail] = q->next[d];
+		q->allot[q->next[d]] += q->quantum;
+		skb = q->qs[d].prev;
+		len = skb->len;
+		__skb_unlink(skb, &q->qs[d]);
+		kfree_skb(skb);
+		sfq_dec(q, d);
+		sch->q.qlen--;
+		q->ht[q->hash[d]] = SFQ_DEPTH;
+		sch->qstats.drops++;
+		return len;
+	}
+
+	return 0;
+}
+
+static int
+sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned hash = sfq_hash(q, skb);
+	sfq_index x;
+
+	x = q->ht[hash];
+	if (x == SFQ_DEPTH) {
+		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+		q->hash[x] = hash;
+	}
+	__skb_queue_tail(&q->qs[x], skb);
+	sfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen < q->limit-1) {
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+
+	sfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+static int
+sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned hash = sfq_hash(q, skb);
+	sfq_index x;
+
+	x = q->ht[hash];
+	if (x == SFQ_DEPTH) {
+		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+		q->hash[x] = hash;
+	}
+	__skb_queue_head(&q->qs[x], skb);
+	sfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen < q->limit - 1) {
+		sch->qstats.requeues++;
+		return 0;
+	}
+
+	sch->qstats.drops++;
+	sfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+sfq_dequeue(struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	sfq_index a, old_a;
+
+	/* No active slots */
+	if (q->tail == SFQ_DEPTH)
+		return NULL;
+
+	a = old_a = q->next[q->tail];
+
+	/* Grab packet */
+	skb = __skb_dequeue(&q->qs[a]);
+	sfq_dec(q, a);
+	sch->q.qlen--;
+
+	/* Is the slot empty? */
+	if (q->qs[a].qlen == 0) {
+		q->ht[q->hash[a]] = SFQ_DEPTH;
+		a = q->next[a];
+		if (a == old_a) {
+			q->tail = SFQ_DEPTH;
+			return skb;
+		}
+		q->next[q->tail] = a;
+		q->allot[a] += q->quantum;
+	} else if ((q->allot[a] -= skb->len) <= 0) {
+		q->tail = a;
+		a = q->next[a];
+		q->allot[a] += q->quantum;
+	}
+	return skb;
+}
+
+static void
+sfq_reset(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	while ((skb = sfq_dequeue(sch)) != NULL)
+		kfree_skb(skb);
+}
+
+static void sfq_perturbation(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	q->perturbation = net_random()&0x1F;
+
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	}
+}
+
+static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+
+	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+		return -EINVAL;
+
+	sch_tree_lock(sch);
+	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+	q->perturb_period = ctl->perturb_period*HZ;
+	if (ctl->limit)
+		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+
+	while (sch->q.qlen >= q->limit-1)
+		sfq_drop(sch);
+
+	del_timer(&q->perturb_timer);
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	}
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	init_timer(&q->perturb_timer);
+	q->perturb_timer.data = (unsigned long)sch;
+	q->perturb_timer.function = sfq_perturbation;
+
+	for (i=0; i<SFQ_HASH_DIVISOR; i++)
+		q->ht[i] = SFQ_DEPTH;
+	for (i=0; i<SFQ_DEPTH; i++) {
+		skb_queue_head_init(&q->qs[i]);
+		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
+		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
+	}
+	q->limit = SFQ_DEPTH;
+	q->max_depth = 0;
+	q->tail = SFQ_DEPTH;
+	if (opt == NULL) {
+		q->quantum = psched_mtu(sch->dev);
+		q->perturb_period = 0;
+	} else {
+		int err = sfq_change(sch, opt);
+		if (err)
+			return err;
+	}
+	for (i=0; i<SFQ_DEPTH; i++)
+		sfq_link(q, i);
+	return 0;
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	del_timer(&q->perturb_timer);
+}
+
+static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct tc_sfq_qopt opt;
+
+	opt.quantum = q->quantum;
+	opt.perturb_period = q->perturb_period/HZ;
+
+	opt.limit = q->limit;
+	opt.divisor = SFQ_HASH_DIVISOR;
+	opt.flows = q->limit;
+
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct Qdisc_ops sfq_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"sfq",
+	.priv_size	=	sizeof(struct sfq_sched_data),
+	.enqueue	=	sfq_enqueue,
+	.dequeue	=	sfq_dequeue,
+	.requeue	=	sfq_requeue,
+	.drop		=	sfq_drop,
+	.init		=	sfq_init,
+	.reset		=	sfq_reset,
+	.destroy	=	sfq_destroy,
+	.change		=	NULL,
+	.dump		=	sfq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init sfq_module_init(void)
+{
+	return register_qdisc(&sfq_qdisc_ops);
+}
+static void __exit sfq_module_exit(void) 
+{
+	unregister_qdisc(&sfq_qdisc_ops);
+}
+module_init(sfq_module_init)
+module_exit(sfq_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -0,0 +1,543 @@
+/*
+ * net/sched/sch_tbf.c	Token Bucket Filter queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
+ *						 original idea by Martin Devera
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/*	Simple Token Bucket Filter.
+	=======================================
+
+	SOURCE.
+	-------
+
+	None.
+
+	Description.
+	------------
+
+	A data flow obeys TBF with rate R and depth B, if for any
+	time interval t_i...t_f the number of transmitted bits
+	does not exceed B + R*(t_f-t_i).
+
+	Packetized version of this definition:
+	The sequence of packets of sizes s_i served at moments t_i
+	obeys TBF, if for any i<=k:
+
+	s_i+....+s_k <= B + R*(t_k - t_i)
+
+	Algorithm.
+	----------
+
+	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
+
+	N(t+delta) = min{B/R, N(t) + delta}
+
+	If the first packet in queue has length S, it may be
+	transmitted only at the time t_* when S/R <= N(t_*),
+	and in this case N(t) jumps:
+
+	N(t_* + 0) = N(t_* - 0) - S/R.
+
+
+
+	Actually, QoS requires two TBF to be applied to a data stream.
+	One of them controls steady state burst size, another
+	one with rate P (peak rate) and depth M (equal to link MTU)
+	limits bursts at a smaller time scale.
+
+	It is easy to see that P>R, and B>M. If P is infinity, this double
+	TBF is equivalent to a single one.
+
+	When TBF works in reshaping mode, latency is estimated as:
+
+	lat = max ((L-B)/R, (L-M)/P)
+
+
+	NOTES.
+	------
+
+	If TBF throttles, it starts a watchdog timer, which will wake it up
+	when it is ready to transmit.
+	Note that the minimal timer resolution is 1/HZ.
+	If no new packets arrive during this period,
+	or if the device is not awaken by EOI for some previous packet,
+	TBF can stop its activity for 1/HZ.
+
+
+	This means, that with depth B, the maximal rate is
+
+	R_crit = B*HZ
+
+	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
+
+	Note that the peak rate TBF is much more tough: with MTU 1500
+	P_crit = 150Kbytes/sec. So, if you need greater peak
+	rates, use alpha with HZ=1000 :-)
+
+	With classful TBF, limit is just kept for backwards compatibility.
+	It is passed to the default bfifo qdisc - if the inner qdisc is
+	changed the limit is not effective anymore.
+*/
+
+struct tbf_sched_data
+{
+/* Parameters */
+	u32		limit;		/* Maximal length of backlog: bytes */
+	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
+	u32		mtu;
+	u32		max_size;
+	struct qdisc_rate_table	*R_tab;
+	struct qdisc_rate_table	*P_tab;
+
+/* Variables */
+	long	tokens;			/* Current number of B tokens */
+	long	ptokens;		/* Current number of P tokens */
+	psched_time_t	t_c;		/* Time check-point */
+	struct timer_list wd_timer;	/* Watchdog timer */
+	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
+};
+
+#define L2T(q,L)   ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
+#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log])
+
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if (skb->len > q->max_size) {
+		sch->qstats.drops++;
+#ifdef CONFIG_NET_CLS_POLICE
+		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
+#endif
+			kfree_skb(skb);
+
+		return NET_XMIT_DROP;
+	}
+
+	if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) {
+		sch->qstats.drops++;
+		return ret;
+	}
+
+	sch->q.qlen++;
+	sch->bstats.bytes += skb->len;
+	sch->bstats.packets++;
+	return 0;
+}
+
+static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	}
+
+	return ret;
+}
+
+static unsigned int tbf_drop(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	unsigned int len;
+
+	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+		sch->q.qlen--;
+		sch->qstats.drops++;
+	}
+	return len;
+}
+
+static void tbf_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
+}
+
+static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = q->qdisc->dequeue(q->qdisc);
+
+	if (skb) {
+		psched_time_t now;
+		long toks, delay;
+		long ptoks = 0;
+		unsigned int len = skb->len;
+
+		PSCHED_GET_TIME(now);
+
+		toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
+
+		if (q->P_tab) {
+			ptoks = toks + q->ptokens;
+			if (ptoks > (long)q->mtu)
+				ptoks = q->mtu;
+			ptoks -= L2T_P(q, len);
+		}
+		toks += q->tokens;
+		if (toks > (long)q->buffer)
+			toks = q->buffer;
+		toks -= L2T(q, len);
+
+		if ((toks|ptoks) >= 0) {
+			q->t_c = now;
+			q->tokens = toks;
+			q->ptokens = ptoks;
+			sch->q.qlen--;
+			sch->flags &= ~TCQ_F_THROTTLED;
+			return skb;
+		}
+
+		delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks));
+
+		if (delay == 0)
+			delay = 1;
+
+		mod_timer(&q->wd_timer, jiffies+delay);
+
+		/* Maybe we have a shorter packet in the queue,
+		   which can be sent now. It sounds cool,
+		   but, however, this is wrong in principle.
+		   We MUST NOT reorder packets under these circumstances.
+
+		   Really, if we split the flow into independent
+		   subflows, it would be a very good solution.
+		   This is the main idea of all FQ algorithms
+		   (cf. CSZ, HPFQ, HFSC)
+		 */
+
+		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
+			/* When requeue fails skb is dropped */
+			sch->q.qlen--;
+			sch->qstats.drops++;
+		}
+
+		sch->flags |= TCQ_F_THROTTLED;
+		sch->qstats.overlimits++;
+	}
+	return NULL;
+}
+
+static void tbf_reset(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	sch->q.qlen = 0;
+	PSCHED_GET_TIME(q->t_c);
+	q->tokens = q->buffer;
+	q->ptokens = q->mtu;
+	sch->flags &= ~TCQ_F_THROTTLED;
+	del_timer(&q->wd_timer);
+}
+
+static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit)
+{
+	struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
+        struct rtattr *rta;
+	int ret;
+
+	if (q) {
+		rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+		if (rta) {
+			rta->rta_type = RTM_NEWQDISC;
+			rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 
+			((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+
+			ret = q->ops->change(q, rta);
+			kfree(rta);
+
+			if (ret == 0)
+				return q;
+		}
+		qdisc_destroy(q);
+	}
+
+	return NULL;
+}
+
+static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
+{
+	int err = -EINVAL;
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct rtattr *tb[TCA_TBF_PTAB];
+	struct tc_tbf_qopt *qopt;
+	struct qdisc_rate_table *rtab = NULL;
+	struct qdisc_rate_table *ptab = NULL;
+	struct Qdisc *child = NULL;
+	int max_size,n;
+
+	if (rtattr_parse_nested(tb, TCA_TBF_PTAB, opt) ||
+	    tb[TCA_TBF_PARMS-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt))
+		goto done;
+
+	qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]);
+	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]);
+	if (rtab == NULL)
+		goto done;
+
+	if (qopt->peakrate.rate) {
+		if (qopt->peakrate.rate > qopt->rate.rate)
+			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]);
+		if (ptab == NULL)
+			goto done;
+	}
+
+	for (n = 0; n < 256; n++)
+		if (rtab->data[n] > qopt->buffer) break;
+	max_size = (n << qopt->rate.cell_log)-1;
+	if (ptab) {
+		int size;
+
+		for (n = 0; n < 256; n++)
+			if (ptab->data[n] > qopt->mtu) break;
+		size = (n << qopt->peakrate.cell_log)-1;
+		if (size < max_size) max_size = size;
+	}
+	if (max_size < 0)
+		goto done;
+
+	if (q->qdisc == &noop_qdisc) {
+		if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL)
+			goto done;
+	}
+
+	sch_tree_lock(sch);
+	if (child) q->qdisc = child;
+	q->limit = qopt->limit;
+	q->mtu = qopt->mtu;
+	q->max_size = max_size;
+	q->buffer = qopt->buffer;
+	q->tokens = q->buffer;
+	q->ptokens = q->mtu;
+	rtab = xchg(&q->R_tab, rtab);
+	ptab = xchg(&q->P_tab, ptab);
+	sch_tree_unlock(sch);
+	err = 0;
+done:
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ptab)
+		qdisc_put_rtab(ptab);
+	return err;
+}
+
+static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	PSCHED_GET_TIME(q->t_c);
+	init_timer(&q->wd_timer);
+	q->wd_timer.function = tbf_watchdog;
+	q->wd_timer.data = (unsigned long)sch;
+
+	q->qdisc = &noop_qdisc;
+
+	return tbf_change(sch, opt);
+}
+
+static void tbf_destroy(struct Qdisc *sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	del_timer(&q->wd_timer);
+
+	if (q->P_tab)
+		qdisc_put_rtab(q->P_tab);
+	if (q->R_tab)
+		qdisc_put_rtab(q->R_tab);
+
+	qdisc_destroy(q->qdisc);
+}
+
+static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_tbf_qopt opt;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	opt.limit = q->limit;
+	opt.rate = q->R_tab->rate;
+	if (q->P_tab)
+		opt.peakrate = q->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	opt.mtu = q->mtu;
+	opt.buffer = q->buffer;
+	RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_reset(*old);
+	sch->q.qlen = 0;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void tbf_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 
+			    struct rtattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int tbf_delete(struct Qdisc *sch, unsigned long arg)
+{
+	return -ENOSYS;
+}
+
+static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static struct Qdisc_class_ops tbf_class_ops =
+{
+	.graft		=	tbf_graft,
+	.leaf		=	tbf_leaf,
+	.get		=	tbf_get,
+	.put		=	tbf_put,
+	.change		=	tbf_change_class,
+	.delete		=	tbf_delete,
+	.walk		=	tbf_walk,
+	.tcf_chain	=	tbf_find_tcf,
+	.dump		=	tbf_dump_class,
+};
+
+static struct Qdisc_ops tbf_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&tbf_class_ops,
+	.id		=	"tbf",
+	.priv_size	=	sizeof(struct tbf_sched_data),
+	.enqueue	=	tbf_enqueue,
+	.dequeue	=	tbf_dequeue,
+	.requeue	=	tbf_requeue,
+	.drop		=	tbf_drop,
+	.init		=	tbf_init,
+	.reset		=	tbf_reset,
+	.destroy	=	tbf_destroy,
+	.change		=	tbf_change,
+	.dump		=	tbf_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init tbf_module_init(void)
+{
+	return register_qdisc(&tbf_qdisc_ops);
+}
+
+static void __exit tbf_module_exit(void)
+{
+	unregister_qdisc(&tbf_qdisc_ops);
+}
+module_init(tbf_module_init)
+module_exit(tbf_module_exit)
+MODULE_LICENSE("GPL");
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -0,0 +1,511 @@
+/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <linux/moduleparam.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/*
+   How to setup it.
+   ----------------
+
+   After loading this module you will find a new device teqlN
+   and new qdisc with the same name. To join a slave to the equalizer
+   you should just set this qdisc on a device f.e.
+
+   # tc qdisc add dev eth0 root teql0
+   # tc qdisc add dev eth1 root teql0
+
+   That's all. Full PnP 8)
+
+   Applicability.
+   --------------
+
+   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
+      signal and generate EOI events. If you want to equalize virtual devices
+      like tunnels, use a normal eql device.
+   2. This device puts no limitations on physical slave characteristics
+      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
+      Certainly, large difference in link speeds will make the resulting
+      eqalized link unusable, because of huge packet reordering.
+      I estimate an upper useful difference as ~10 times.
+   3. If the slave requires address resolution, only protocols using
+      neighbour cache (IPv4/IPv6) will work over the equalized link.
+      Other protocols are still allowed to use the slave device directly,
+      which will not break load balancing, though native slave
+      traffic will have the highest priority.  */
+
+struct teql_master
+{
+	struct Qdisc_ops qops;
+	struct net_device *dev;
+	struct Qdisc *slaves;
+	struct list_head master_list;
+	struct net_device_stats stats;
+};
+
+struct teql_sched_data
+{
+	struct Qdisc *next;
+	struct teql_master *m;
+	struct neighbour *ncache;
+	struct sk_buff_head q;
+};
+
+#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+
+#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
+
+/* "teql*" qdisc routines */
+
+static int
+teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct net_device *dev = sch->dev;
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_tail(&q->q, skb);
+	if (q->q.qlen <= dev->tx_queue_len) {
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+
+	__skb_unlink(skb, &q->q);
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+static int
+teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_head(&q->q, skb);
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+teql_dequeue(struct Qdisc* sch)
+{
+	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue(&dat->q);
+	if (skb == NULL) {
+		struct net_device *m = dat->m->dev->qdisc->dev;
+		if (m) {
+			dat->m->slaves = sch;
+			netif_wake_queue(m);
+		}
+	}
+	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
+	return skb;
+}
+
+static __inline__ void
+teql_neigh_release(struct neighbour *n)
+{
+	if (n)
+		neigh_release(n);
+}
+
+static void
+teql_reset(struct Qdisc* sch)
+{
+	struct teql_sched_data *dat = qdisc_priv(sch);
+
+	skb_queue_purge(&dat->q);
+	sch->q.qlen = 0;
+	teql_neigh_release(xchg(&dat->ncache, NULL));
+}
+
+static void
+teql_destroy(struct Qdisc* sch)
+{
+	struct Qdisc *q, *prev;
+	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct teql_master *master = dat->m;
+
+	if ((prev = master->slaves) != NULL) {
+		do {
+			q = NEXT_SLAVE(prev);
+			if (q == sch) {
+				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
+				if (q == master->slaves) {
+					master->slaves = NEXT_SLAVE(q);
+					if (q == master->slaves) {
+						master->slaves = NULL;
+						spin_lock_bh(&master->dev->queue_lock);
+						qdisc_reset(master->dev->qdisc);
+						spin_unlock_bh(&master->dev->queue_lock);
+					}
+				}
+				skb_queue_purge(&dat->q);
+				teql_neigh_release(xchg(&dat->ncache, NULL));
+				break;
+			}
+				
+		} while ((prev = q) != master->slaves);
+	}
+}
+
+static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct net_device *dev = sch->dev;
+	struct teql_master *m = (struct teql_master*)sch->ops;
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	if (dev->hard_header_len > m->dev->hard_header_len)
+		return -EINVAL;
+
+	if (m->dev == dev)
+		return -ELOOP;
+
+	q->m = m;
+
+	skb_queue_head_init(&q->q);
+
+	if (m->slaves) {
+		if (m->dev->flags & IFF_UP) {
+			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
+			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
+			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
+			    || dev->mtu < m->dev->mtu)
+				return -EINVAL;
+		} else {
+			if (!(dev->flags&IFF_POINTOPOINT))
+				m->dev->flags &= ~IFF_POINTOPOINT;
+			if (!(dev->flags&IFF_BROADCAST))
+				m->dev->flags &= ~IFF_BROADCAST;
+			if (!(dev->flags&IFF_MULTICAST))
+				m->dev->flags &= ~IFF_MULTICAST;
+			if (dev->mtu < m->dev->mtu)
+				m->dev->mtu = dev->mtu;
+		}
+		q->next = NEXT_SLAVE(m->slaves);
+		NEXT_SLAVE(m->slaves) = sch;
+	} else {
+		q->next = sch;
+		m->slaves = sch;
+		m->dev->mtu = dev->mtu;
+		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
+	}
+	return 0;
+}
+
+/* "teql*" netdevice routines */
+
+static int
+__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+{
+	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
+	struct neighbour *mn = skb->dst->neighbour;
+	struct neighbour *n = q->ncache;
+
+	if (mn->tbl == NULL)
+		return -EINVAL;
+	if (n && n->tbl == mn->tbl &&
+	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
+		atomic_inc(&n->refcnt);
+	} else {
+		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+	}
+	if (neigh_event_send(n, skb_res) == 0) {
+		int err;
+		read_lock(&n->lock);
+		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
+		read_unlock(&n->lock);
+		if (err < 0) {
+			neigh_release(n);
+			return -EINVAL;
+		}
+		teql_neigh_release(xchg(&q->ncache, n));
+		return 0;
+	}
+	neigh_release(n);
+	return (skb_res == NULL) ? -EAGAIN : 1;
+}
+
+static __inline__ int
+teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+{
+	if (dev->hard_header == NULL ||
+	    skb->dst == NULL ||
+	    skb->dst->neighbour == NULL)
+		return 0;
+	return __teql_resolve(skb, skb_res, dev);
+}
+
+static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct teql_master *master = (void*)dev->priv;
+	struct Qdisc *start, *q;
+	int busy;
+	int nores;
+	int len = skb->len;
+	struct sk_buff *skb_res = NULL;
+
+	start = master->slaves;
+
+restart:
+	nores = 0;
+	busy = 0;
+
+	if ((q = start) == NULL)
+		goto drop;
+
+	do {
+		struct net_device *slave = q->dev;
+		
+		if (slave->qdisc_sleeping != q)
+			continue;
+		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
+			busy = 1;
+			continue;
+		}
+
+		switch (teql_resolve(skb, skb_res, slave)) {
+		case 0:
+			if (spin_trylock(&slave->xmit_lock)) {
+				slave->xmit_lock_owner = smp_processor_id();
+				if (!netif_queue_stopped(slave) &&
+				    slave->hard_start_xmit(skb, slave) == 0) {
+					slave->xmit_lock_owner = -1;
+					spin_unlock(&slave->xmit_lock);
+					master->slaves = NEXT_SLAVE(q);
+					netif_wake_queue(dev);
+					master->stats.tx_packets++;
+					master->stats.tx_bytes += len;
+					return 0;
+				}
+				slave->xmit_lock_owner = -1;
+				spin_unlock(&slave->xmit_lock);
+			}
+			if (netif_queue_stopped(dev))
+				busy = 1;
+			break;
+		case 1:
+			master->slaves = NEXT_SLAVE(q);
+			return 0;
+		default:
+			nores = 1;
+			break;
+		}
+		__skb_pull(skb, skb->nh.raw - skb->data);
+	} while ((q = NEXT_SLAVE(q)) != start);
+
+	if (nores && skb_res == NULL) {
+		skb_res = skb;
+		goto restart;
+	}
+
+	if (busy) {
+		netif_stop_queue(dev);
+		return 1;
+	}
+	master->stats.tx_errors++;
+
+drop:
+	master->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static int teql_master_open(struct net_device *dev)
+{
+	struct Qdisc * q;
+	struct teql_master *m = (void*)dev->priv;
+	int mtu = 0xFFFE;
+	unsigned flags = IFF_NOARP|IFF_MULTICAST;
+
+	if (m->slaves == NULL)
+		return -EUNATCH;
+
+	flags = FMASK;
+
+	q = m->slaves;
+	do {
+		struct net_device *slave = q->dev;
+
+		if (slave == NULL)
+			return -EUNATCH;
+
+		if (slave->mtu < mtu)
+			mtu = slave->mtu;
+		if (slave->hard_header_len > LL_MAX_HEADER)
+			return -EINVAL;
+
+		/* If all the slaves are BROADCAST, master is BROADCAST
+		   If all the slaves are PtP, master is PtP
+		   Otherwise, master is NBMA.
+		 */
+		if (!(slave->flags&IFF_POINTOPOINT))
+			flags &= ~IFF_POINTOPOINT;
+		if (!(slave->flags&IFF_BROADCAST))
+			flags &= ~IFF_BROADCAST;
+		if (!(slave->flags&IFF_MULTICAST))
+			flags &= ~IFF_MULTICAST;
+	} while ((q = NEXT_SLAVE(q)) != m->slaves);
+
+	m->dev->mtu = mtu;
+	m->dev->flags = (m->dev->flags&~FMASK) | flags;
+	netif_start_queue(m->dev);
+	return 0;
+}
+
+static int teql_master_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static struct net_device_stats *teql_master_stats(struct net_device *dev)
+{
+	struct teql_master *m = (void*)dev->priv;
+	return &m->stats;
+}
+
+static int teql_master_mtu(struct net_device *dev, int new_mtu)
+{
+	struct teql_master *m = (void*)dev->priv;
+	struct Qdisc *q;
+
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	q = m->slaves;
+	if (q) {
+		do {
+			if (new_mtu > q->dev->mtu)
+				return -EINVAL;
+		} while ((q=NEXT_SLAVE(q)) != m->slaves);
+	}
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static __init void teql_master_setup(struct net_device *dev)
+{
+	struct teql_master *master = dev->priv;
+	struct Qdisc_ops *ops = &master->qops;
+
+	master->dev	= dev;
+	ops->priv_size  = sizeof(struct teql_sched_data);
+	
+	ops->enqueue	=	teql_enqueue;
+	ops->dequeue	=	teql_dequeue;
+	ops->requeue	=	teql_requeue;
+	ops->init	=	teql_qdisc_init;
+	ops->reset	=	teql_reset;
+	ops->destroy	=	teql_destroy;
+	ops->owner	=	THIS_MODULE;
+
+	dev->open		= teql_master_open;
+	dev->hard_start_xmit	= teql_master_xmit;
+	dev->stop		= teql_master_close;
+	dev->get_stats		= teql_master_stats;
+	dev->change_mtu		= teql_master_mtu;
+	dev->type		= ARPHRD_VOID;
+	dev->mtu		= 1500;
+	dev->tx_queue_len	= 100;
+	dev->flags		= IFF_NOARP;
+	dev->hard_header_len	= LL_MAX_HEADER;
+	SET_MODULE_OWNER(dev);
+}
+
+static LIST_HEAD(master_dev_list);
+static int max_equalizers = 1;
+module_param(max_equalizers, int, 0);
+MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
+
+static int __init teql_init(void)
+{
+	int i;
+	int err = -ENODEV;
+
+	for (i = 0; i < max_equalizers; i++) {
+		struct net_device *dev;
+		struct teql_master *master;
+
+		dev = alloc_netdev(sizeof(struct teql_master),
+				  "teql%d", teql_master_setup);
+		if (!dev) {
+			err = -ENOMEM;
+			break;
+		}
+
+		if ((err = register_netdev(dev))) {
+			free_netdev(dev);
+			break;
+		}
+
+		master = dev->priv;
+
+		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
+		err = register_qdisc(&master->qops);
+
+		if (err) {
+			unregister_netdev(dev);
+			free_netdev(dev);
+			break;
+		}
+
+		list_add_tail(&master->master_list, &master_dev_list);
+	}
+	return i ? 0 : err;
+}
+
+static void __exit teql_exit(void) 
+{
+	struct teql_master *master, *nxt;
+
+	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
+
+		list_del(&master->master_list);
+
+		unregister_qdisc(&master->qops);
+		unregister_netdev(master->dev);
+		free_netdev(master->dev);
+	}
+}
+
+module_init(teql_init);
+module_exit(teql_exit);
+
+MODULE_LICENSE("GPL");