kernel.spec -> kernel-2.6.spec
[linux-2.6.git] / linux-2.6-250-ipsets.patch
1 diff --git a/include/linux/netfilter_ipv4/ip_set.h b/include/linux/netfilter_ipv4/ip_set.h
2 new file mode 100644
3 index 0000000..92a746e
4 --- /dev/null
5 +++ b/include/linux/netfilter_ipv4/ip_set.h
6 @@ -0,0 +1,498 @@
7 +#ifndef _IP_SET_H
8 +#define _IP_SET_H
9 +
10 +/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
11 + *                         Patrick Schaaf <bof@bof.de>
12 + *                         Martin Josefsson <gandalf@wlug.westbo.se>
13 + * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
14 + *
15 + * This program is free software; you can redistribute it and/or modify
16 + * it under the terms of the GNU General Public License version 2 as
17 + * published by the Free Software Foundation.  
18 + */
19 +
20 +#if 0
21 +#define IP_SET_DEBUG
22 +#endif
23 +
24 +/*
25 + * A sockopt of such quality has hardly ever been seen before on the open
26 + * market!  This little beauty, hardly ever used: above 64, so it's
27 + * traditionally used for firewalling, not touched (even once!) by the
28 + * 2.0, 2.2 and 2.4 kernels!
29 + *
30 + * Comes with its own certificate of authenticity, valid anywhere in the
31 + * Free world!
32 + *
33 + * Rusty, 19.4.2000
34 + */
35 +#define SO_IP_SET              83
36 +
37 +/*
38 + * Heavily modify by Joakim Axelsson 08.03.2002
39 + * - Made it more modulebased
40 + *
41 + * Additional heavy modifications by Jozsef Kadlecsik 22.02.2004
42 + * - bindings added
43 + * - in order to "deal with" backward compatibility, renamed to ipset
44 + */
45 +
46 +/* 
47 + * Used so that the kernel module and ipset-binary can match their versions 
48 + */
49 +#define IP_SET_PROTOCOL_VERSION 2
50 +
51 +#define IP_SET_MAXNAMELEN 32   /* set names and set typenames */
52 +
53 +/* Lets work with our own typedef for representing an IP address.
54 + * We hope to make the code more portable, possibly to IPv6...
55 + *
56 + * The representation works in HOST byte order, because most set types
57 + * will perform arithmetic operations and compare operations.
58 + * 
59 + * For now the type is an uint32_t.
60 + *
61 + * Make sure to ONLY use the functions when translating and parsing
62 + * in order to keep the host byte order and make it more portable:
63 + *  parse_ip()
64 + *  parse_mask()
65 + *  parse_ipandmask()
66 + *  ip_tostring()
67 + * (Joakim: where are they???)
68 + */
69 +
70 +typedef uint32_t ip_set_ip_t;
71 +
72 +/* Sets are identified by an id in kernel space. Tweak with ip_set_id_t
73 + * and IP_SET_INVALID_ID if you want to increase the max number of sets.
74 + */
75 +typedef uint16_t ip_set_id_t;
76 +
77 +#define IP_SET_INVALID_ID      65535
78 +
79 +/* How deep we follow bindings */
80 +#define IP_SET_MAX_BINDINGS    6
81 +
82 +/*
83 + * Option flags for kernel operations (ipt_set_info)
84 + */
85 +#define IPSET_SRC              0x01    /* Source match/add */
86 +#define IPSET_DST              0x02    /* Destination match/add */
87 +#define IPSET_MATCH_INV                0x04    /* Inverse matching */
88 +
89 +/*
90 + * Set features
91 + */
92 +#define IPSET_TYPE_IP          0x01    /* IP address type of set */
93 +#define IPSET_TYPE_PORT                0x02    /* Port type of set */
94 +#define IPSET_DATA_SINGLE      0x04    /* Single data storage */
95 +#define IPSET_DATA_DOUBLE      0x08    /* Double data storage */
96 +
97 +/* Reserved keywords */
98 +#define IPSET_TOKEN_DEFAULT    ":default:"
99 +#define IPSET_TOKEN_ALL                ":all:"
100 +
101 +/* SO_IP_SET operation constants, and their request struct types.
102 + *
103 + * Operation ids:
104 + *       0-99:  commands with version checking
105 + *     100-199: add/del/test/bind/unbind
106 + *     200-299: list, save, restore
107 + */
108 +
109 +/* Single shot operations: 
110 + * version, create, destroy, flush, rename and swap 
111 + *
112 + * Sets are identified by name.
113 + */
114 +
115 +#define IP_SET_REQ_STD         \
116 +       unsigned op;            \
117 +       unsigned version;       \
118 +       char name[IP_SET_MAXNAMELEN]
119 +
120 +#define IP_SET_OP_CREATE       0x00000001      /* Create a new (empty) set */
121 +struct ip_set_req_create {
122 +       IP_SET_REQ_STD;
123 +       char typename[IP_SET_MAXNAMELEN];
124 +};
125 +
126 +#define IP_SET_OP_DESTROY      0x00000002      /* Remove a (empty) set */
127 +struct ip_set_req_std {
128 +       IP_SET_REQ_STD;
129 +};
130 +
131 +#define IP_SET_OP_FLUSH                0x00000003      /* Remove all IPs in a set */
132 +/* Uses ip_set_req_std */
133 +
134 +#define IP_SET_OP_RENAME       0x00000004      /* Rename a set */
135 +/* Uses ip_set_req_create */
136 +
137 +#define IP_SET_OP_SWAP         0x00000005      /* Swap two sets */
138 +/* Uses ip_set_req_create */
139 +
140 +union ip_set_name_index {
141 +       char name[IP_SET_MAXNAMELEN];
142 +       ip_set_id_t index;
143 +};
144 +
145 +#define IP_SET_OP_GET_BYNAME   0x00000006      /* Get set index by name */
146 +struct ip_set_req_get_set {
147 +       unsigned op;
148 +       unsigned version;
149 +       union ip_set_name_index set;
150 +};
151 +
152 +#define IP_SET_OP_GET_BYINDEX  0x00000007      /* Get set name by index */
153 +/* Uses ip_set_req_get_set */
154 +
155 +#define IP_SET_OP_VERSION      0x00000100      /* Ask kernel version */
156 +struct ip_set_req_version {
157 +       unsigned op;
158 +       unsigned version;
159 +};
160 +
161 +/* Double shots operations: 
162 + * add, del, test, bind and unbind.
163 + *
164 + * First we query the kernel to get the index and type of the target set,
165 + * then issue the command. Validity of IP is checked in kernel in order
166 + * to minimalize sockopt operations.
167 + */
168 +
169 +/* Get minimal set data for add/del/test/bind/unbind IP */
170 +#define IP_SET_OP_ADT_GET      0x00000010      /* Get set and type */
171 +struct ip_set_req_adt_get {
172 +       unsigned op;
173 +       unsigned version;
174 +       union ip_set_name_index set;
175 +       char typename[IP_SET_MAXNAMELEN];
176 +};
177 +
178 +#define IP_SET_REQ_BYINDEX     \
179 +       unsigned op;            \
180 +       ip_set_id_t index;
181 +
182 +struct ip_set_req_adt {
183 +       IP_SET_REQ_BYINDEX;
184 +};
185 +
186 +#define IP_SET_OP_ADD_IP       0x00000101      /* Add an IP to a set */
187 +/* Uses ip_set_req_adt, with type specific addage */
188 +
189 +#define IP_SET_OP_DEL_IP       0x00000102      /* Remove an IP from a set */
190 +/* Uses ip_set_req_adt, with type specific addage */
191 +
192 +#define IP_SET_OP_TEST_IP      0x00000103      /* Test an IP in a set */
193 +/* Uses ip_set_req_adt, with type specific addage */
194 +
195 +#define IP_SET_OP_BIND_SET     0x00000104      /* Bind an IP to a set */
196 +/* Uses ip_set_req_bind, with type specific addage */
197 +struct ip_set_req_bind {
198 +       IP_SET_REQ_BYINDEX;
199 +       char binding[IP_SET_MAXNAMELEN];
200 +};
201 +
202 +#define IP_SET_OP_UNBIND_SET   0x00000105      /* Unbind an IP from a set */
203 +/* Uses ip_set_req_bind, with type speficic addage 
204 + * index = 0 means unbinding for all sets */
205 +
206 +#define IP_SET_OP_TEST_BIND_SET        0x00000106      /* Test binding an IP to a set */
207 +/* Uses ip_set_req_bind, with type specific addage */
208 +
209 +/* Multiple shots operations: list, save, restore.
210 + *
211 + * - check kernel version and query the max number of sets
212 + * - get the basic information on all sets
213 + *   and size required for the next step
214 + * - get actual set data: header, data, bindings
215 + */
216 +
217 +/* Get max_sets and the index of a queried set
218 + */
219 +#define IP_SET_OP_MAX_SETS     0x00000020
220 +struct ip_set_req_max_sets {
221 +       unsigned op;
222 +       unsigned version;
223 +       ip_set_id_t max_sets;           /* max_sets */
224 +       ip_set_id_t sets;               /* real number of sets */
225 +       union ip_set_name_index set;    /* index of set if name used */
226 +};
227 +
228 +/* Get the id and name of the sets plus size for next step */
229 +#define IP_SET_OP_LIST_SIZE    0x00000201
230 +#define IP_SET_OP_SAVE_SIZE    0x00000202
231 +struct ip_set_req_setnames {
232 +       unsigned op;
233 +       ip_set_id_t index;              /* set to list/save */
234 +       size_t size;                    /* size to get setdata/bindings */
235 +       /* followed by sets number of struct ip_set_name_list */
236 +};
237 +
238 +struct ip_set_name_list {
239 +       char name[IP_SET_MAXNAMELEN];
240 +       char typename[IP_SET_MAXNAMELEN];
241 +       ip_set_id_t index;
242 +       ip_set_id_t id;
243 +};
244 +
245 +/* The actual list operation */
246 +#define IP_SET_OP_LIST         0x00000203
247 +struct ip_set_req_list {
248 +       IP_SET_REQ_BYINDEX;
249 +       /* sets number of struct ip_set_list in reply */ 
250 +};
251 +
252 +struct ip_set_list {
253 +       ip_set_id_t index;
254 +       ip_set_id_t binding;
255 +       u_int32_t ref;
256 +       size_t header_size;     /* Set header data of header_size */
257 +       size_t members_size;    /* Set members data of members_size */
258 +       size_t bindings_size;   /* Set bindings data of bindings_size */
259 +};
260 +
261 +struct ip_set_hash_list {
262 +       ip_set_ip_t ip;
263 +       ip_set_id_t binding;
264 +};
265 +
266 +/* The save operation */
267 +#define IP_SET_OP_SAVE         0x00000204
268 +/* Uses ip_set_req_list, in the reply replaced by
269 + * sets number of struct ip_set_save plus a marker
270 + * ip_set_save followed by ip_set_hash_save structures.
271 + */
272 +struct ip_set_save {
273 +       ip_set_id_t index;
274 +       ip_set_id_t binding;
275 +       size_t header_size;     /* Set header data of header_size */
276 +       size_t members_size;    /* Set members data of members_size */
277 +};
278 +
279 +/* At restoring, ip == 0 means default binding for the given set: */
280 +struct ip_set_hash_save {
281 +       ip_set_ip_t ip;
282 +       ip_set_id_t id;
283 +       ip_set_id_t binding;
284 +};
285 +
286 +/* The restore operation */
287 +#define IP_SET_OP_RESTORE      0x00000205
288 +/* Uses ip_set_req_setnames followed by ip_set_restore structures
289 + * plus a marker ip_set_restore, followed by ip_set_hash_save 
290 + * structures.
291 + */
292 +struct ip_set_restore {
293 +       char name[IP_SET_MAXNAMELEN];
294 +       char typename[IP_SET_MAXNAMELEN];
295 +       ip_set_id_t index;
296 +       size_t header_size;     /* Create data of header_size */
297 +       size_t members_size;    /* Set members data of members_size */
298 +};
299 +
300 +static inline int bitmap_bytes(ip_set_ip_t a, ip_set_ip_t b)
301 +{
302 +       return 4 * ((((b - a + 8) / 8) + 3) / 4);
303 +}
304 +
305 +#ifdef __KERNEL__
306 +
307 +#define ip_set_printk(format, args...)                         \
308 +       do {                                                    \
309 +               printk("%s: %s: ", __FILE__, __FUNCTION__);     \
310 +               printk(format "\n" , ## args);                  \
311 +       } while (0)
312 +
313 +#if defined(IP_SET_DEBUG)
314 +#define DP(format, args...)                                    \
315 +       do {                                                    \
316 +               printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\
317 +               printk(format "\n" , ## args);                  \
318 +       } while (0)
319 +#define IP_SET_ASSERT(x)                                       \
320 +       do {                                                    \
321 +               if (!(x))                                       \
322 +                       printk("IP_SET_ASSERT: %s:%i(%s)\n",    \
323 +                               __FILE__, __LINE__, __FUNCTION__); \
324 +       } while (0)
325 +#else
326 +#define DP(format, args...)
327 +#define IP_SET_ASSERT(x)
328 +#endif
329 +
330 +struct ip_set;
331 +
332 +/*
333 + * The ip_set_type definition - one per set type, e.g. "ipmap".
334 + *
335 + * Each individual set has a pointer, set->type, going to one
336 + * of these structures. Function pointers inside the structure implement
337 + * the real behaviour of the sets.
338 + *
339 + * If not mentioned differently, the implementation behind the function
340 + * pointers of a set_type, is expected to return 0 if ok, and a negative
341 + * errno (e.g. -EINVAL) on error.
342 + */
343 +struct ip_set_type {
344 +       struct list_head list;  /* next in list of set types */
345 +
346 +       /* test for IP in set (kernel: iptables -m set src|dst)
347 +        * return 0 if not in set, 1 if in set.
348 +        */
349 +       int (*testip_kernel) (struct ip_set *set,
350 +                             const struct sk_buff * skb, 
351 +                             ip_set_ip_t *ip,
352 +                             const u_int32_t *flags,
353 +                             unsigned char index);
354 +
355 +       /* test for IP in set (userspace: ipset -T set IP)
356 +        * return 0 if not in set, 1 if in set.
357 +        */
358 +       int (*testip) (struct ip_set *set,
359 +                      const void *data, size_t size,
360 +                      ip_set_ip_t *ip);
361 +
362 +       /*
363 +        * Size of the data structure passed by when
364 +        * adding/deletin/testing an entry.
365 +        */
366 +       size_t reqsize;
367 +
368 +       /* Add IP into set (userspace: ipset -A set IP)
369 +        * Return -EEXIST if the address is already in the set,
370 +        * and -ERANGE if the address lies outside the set bounds.
371 +        * If the address was not already in the set, 0 is returned.
372 +        */
373 +       int (*addip) (struct ip_set *set, 
374 +                     const void *data, size_t size,
375 +                     ip_set_ip_t *ip);
376 +
377 +       /* Add IP into set (kernel: iptables ... -j SET set src|dst)
378 +        * Return -EEXIST if the address is already in the set,
379 +        * and -ERANGE if the address lies outside the set bounds.
380 +        * If the address was not already in the set, 0 is returned.
381 +        */
382 +       int (*addip_kernel) (struct ip_set *set,
383 +                            const struct sk_buff * skb, 
384 +                            ip_set_ip_t *ip,
385 +                            const u_int32_t *flags,
386 +                            unsigned char index);
387 +
388 +       /* remove IP from set (userspace: ipset -D set --entry x)
389 +        * Return -EEXIST if the address is NOT in the set,
390 +        * and -ERANGE if the address lies outside the set bounds.
391 +        * If the address really was in the set, 0 is returned.
392 +        */
393 +       int (*delip) (struct ip_set *set, 
394 +                     const void *data, size_t size,
395 +                     ip_set_ip_t *ip);
396 +
397 +       /* remove IP from set (kernel: iptables ... -j SET --entry x)
398 +        * Return -EEXIST if the address is NOT in the set,
399 +        * and -ERANGE if the address lies outside the set bounds.
400 +        * If the address really was in the set, 0 is returned.
401 +        */
402 +       int (*delip_kernel) (struct ip_set *set,
403 +                            const struct sk_buff * skb, 
404 +                            ip_set_ip_t *ip,
405 +                            const u_int32_t *flags,
406 +                            unsigned char index);
407 +
408 +       /* new set creation - allocated type specific items
409 +        */
410 +       int (*create) (struct ip_set *set,
411 +                      const void *data, size_t size);
412 +
413 +       /* retry the operation after successfully tweaking the set
414 +        */
415 +       int (*retry) (struct ip_set *set);
416 +
417 +       /* set destruction - free type specific items
418 +        * There is no return value.
419 +        * Can be called only when child sets are destroyed.
420 +        */
421 +       void (*destroy) (struct ip_set *set);
422 +
423 +       /* set flushing - reset all bits in the set, or something similar.
424 +        * There is no return value.
425 +        */
426 +       void (*flush) (struct ip_set *set);
427 +
428 +       /* Listing: size needed for header
429 +        */
430 +       size_t header_size;
431 +
432 +       /* Listing: Get the header
433 +        *
434 +        * Fill in the information in "data".
435 +        * This function is always run after list_header_size() under a 
436 +        * writelock on the set. Therefor is the length of "data" always 
437 +        * correct. 
438 +        */
439 +       void (*list_header) (const struct ip_set *set, 
440 +                            void *data);
441 +
442 +       /* Listing: Get the size for the set members
443 +        */
444 +       int (*list_members_size) (const struct ip_set *set);
445 +
446 +       /* Listing: Get the set members
447 +        *
448 +        * Fill in the information in "data".
449 +        * This function is always run after list_member_size() under a 
450 +        * writelock on the set. Therefor is the length of "data" always 
451 +        * correct. 
452 +        */
453 +       void (*list_members) (const struct ip_set *set,
454 +                             void *data);
455 +
456 +       char typename[IP_SET_MAXNAMELEN];
457 +       unsigned char features;
458 +       int protocol_version;
459 +
460 +       /* Set this to THIS_MODULE if you are a module, otherwise NULL */
461 +       struct module *me;
462 +};
463 +
464 +extern int ip_set_register_set_type(struct ip_set_type *set_type);
465 +extern void ip_set_unregister_set_type(struct ip_set_type *set_type);
466 +
467 +/* A generic ipset */
468 +struct ip_set {
469 +       char name[IP_SET_MAXNAMELEN];   /* the name of the set */
470 +       rwlock_t lock;                  /* lock for concurrency control */
471 +       ip_set_id_t id;                 /* set id for swapping */
472 +       ip_set_id_t binding;            /* default binding for the set */
473 +       atomic_t ref;                   /* in kernel and in hash references */
474 +       struct ip_set_type *type;       /* the set types */
475 +       void *data;                     /* pooltype specific data */
476 +};
477 +
478 +/* Structure to bind set elements to sets */
479 +struct ip_set_hash {
480 +       struct list_head list;          /* list of clashing entries in hash */
481 +       ip_set_ip_t ip;                 /* ip from set */
482 +       ip_set_id_t id;                 /* set id */
483 +       ip_set_id_t binding;            /* set we bind the element to */
484 +};
485 +
486 +/* register and unregister set references */
487 +extern ip_set_id_t ip_set_get_byname(const char name[IP_SET_MAXNAMELEN]);
488 +extern ip_set_id_t ip_set_get_byindex(ip_set_id_t id);
489 +extern void ip_set_put(ip_set_id_t id);
490 +
491 +/* API for iptables set match, and SET target */
492 +extern void ip_set_addip_kernel(ip_set_id_t id,
493 +                               const struct sk_buff *skb,
494 +                               const u_int32_t *flags);
495 +extern void ip_set_delip_kernel(ip_set_id_t id,
496 +                               const struct sk_buff *skb,
497 +                               const u_int32_t *flags);
498 +extern int ip_set_testip_kernel(ip_set_id_t id,
499 +                               const struct sk_buff *skb,
500 +                               const u_int32_t *flags);
501 +
502 +#endif                         /* __KERNEL__ */
503 +
504 +#endif /*_IP_SET_H*/
505 diff --git a/include/linux/netfilter_ipv4/ip_set_iphash.h b/include/linux/netfilter_ipv4/ip_set_iphash.h
506 new file mode 100644
507 index 0000000..7de854b
508 --- /dev/null
509 +++ b/include/linux/netfilter_ipv4/ip_set_iphash.h
510 @@ -0,0 +1,30 @@
511 +#ifndef __IP_SET_IPHASH_H
512 +#define __IP_SET_IPHASH_H
513 +
514 +#include <linux/netfilter_ipv4/ip_set.h>
515 +
516 +#define SETTYPE_NAME "iphash"
517 +#define MAX_RANGE 0x0000FFFF
518 +
519 +struct ip_set_iphash {
520 +       ip_set_ip_t *members;           /* the iphash proper */
521 +       uint32_t elements;              /* number of elements */
522 +       uint32_t hashsize;              /* hash size */
523 +       uint16_t probes;                /* max number of probes  */
524 +       uint16_t resize;                /* resize factor in percent */
525 +       ip_set_ip_t netmask;            /* netmask */
526 +       void *initval[0];               /* initvals for jhash_1word */
527 +};
528 +
529 +struct ip_set_req_iphash_create {
530 +       uint32_t hashsize;
531 +       uint16_t probes;
532 +       uint16_t resize;
533 +       ip_set_ip_t netmask;
534 +};
535 +
536 +struct ip_set_req_iphash {
537 +       ip_set_ip_t ip;
538 +};
539 +
540 +#endif /* __IP_SET_IPHASH_H */
541 diff --git a/include/linux/netfilter_ipv4/ip_set_ipmap.h b/include/linux/netfilter_ipv4/ip_set_ipmap.h
542 new file mode 100644
543 index 0000000..2435102
544 --- /dev/null
545 +++ b/include/linux/netfilter_ipv4/ip_set_ipmap.h
546 @@ -0,0 +1,56 @@
547 +#ifndef __IP_SET_IPMAP_H
548 +#define __IP_SET_IPMAP_H
549 +
550 +#include <linux/netfilter_ipv4/ip_set.h>
551 +
552 +#define SETTYPE_NAME "ipmap"
553 +#define MAX_RANGE 0x0000FFFF
554 +
555 +struct ip_set_ipmap {
556 +       void *members;                  /* the ipmap proper */
557 +       ip_set_ip_t first_ip;           /* host byte order, included in range */
558 +       ip_set_ip_t last_ip;            /* host byte order, included in range */
559 +       ip_set_ip_t netmask;            /* subnet netmask */
560 +       ip_set_ip_t sizeid;             /* size of set in IPs */
561 +       ip_set_ip_t hosts;              /* number of hosts in a subnet */
562 +};
563 +
564 +struct ip_set_req_ipmap_create {
565 +       ip_set_ip_t from;
566 +       ip_set_ip_t to;
567 +       ip_set_ip_t netmask;
568 +};
569 +
570 +struct ip_set_req_ipmap {
571 +       ip_set_ip_t ip;
572 +};
573 +
574 +unsigned int
575 +mask_to_bits(ip_set_ip_t mask)
576 +{
577 +       unsigned int bits = 32;
578 +       ip_set_ip_t maskaddr;
579 +       
580 +       if (mask == 0xFFFFFFFF)
581 +               return bits;
582 +       
583 +       maskaddr = 0xFFFFFFFE;
584 +       while (--bits >= 0 && maskaddr != mask)
585 +               maskaddr <<= 1;
586 +       
587 +       return bits;
588 +}
589 +
590 +ip_set_ip_t
591 +range_to_mask(ip_set_ip_t from, ip_set_ip_t to, unsigned int *bits)
592 +{
593 +       ip_set_ip_t mask = 0xFFFFFFFE;
594 +       
595 +       *bits = 32;
596 +       while (--(*bits) >= 0 && mask && (to & mask) != from)
597 +               mask <<= 1;
598 +               
599 +       return mask;
600 +}
601 +       
602 +#endif /* __IP_SET_IPMAP_H */
603 diff --git a/include/linux/netfilter_ipv4/ip_set_ipporthash.h b/include/linux/netfilter_ipv4/ip_set_ipporthash.h
604 new file mode 100644
605 index 0000000..b715c56
606 --- /dev/null
607 +++ b/include/linux/netfilter_ipv4/ip_set_ipporthash.h
608 @@ -0,0 +1,34 @@
609 +#ifndef __IP_SET_IPPORTHASH_H
610 +#define __IP_SET_IPPORTHASH_H
611 +
612 +#include <linux/netfilter_ipv4/ip_set.h>
613 +
614 +#define SETTYPE_NAME "ipporthash"
615 +#define MAX_RANGE 0x0000FFFF
616 +#define INVALID_PORT   (MAX_RANGE + 1)
617 +
618 +struct ip_set_ipporthash {
619 +       ip_set_ip_t *members;           /* the ipporthash proper */
620 +       uint32_t elements;              /* number of elements */
621 +       uint32_t hashsize;              /* hash size */
622 +       uint16_t probes;                /* max number of probes  */
623 +       uint16_t resize;                /* resize factor in percent */
624 +       ip_set_ip_t first_ip;           /* host byte order, included in range */
625 +       ip_set_ip_t last_ip;            /* host byte order, included in range */
626 +       void *initval[0];               /* initvals for jhash_1word */
627 +};
628 +
629 +struct ip_set_req_ipporthash_create {
630 +       uint32_t hashsize;
631 +       uint16_t probes;
632 +       uint16_t resize;
633 +       ip_set_ip_t from;
634 +       ip_set_ip_t to;
635 +};
636 +
637 +struct ip_set_req_ipporthash {
638 +       ip_set_ip_t ip;
639 +       ip_set_ip_t port;
640 +};
641 +
642 +#endif /* __IP_SET_IPPORTHASH_H */
643 diff --git a/include/linux/netfilter_ipv4/ip_set_iptree.h b/include/linux/netfilter_ipv4/ip_set_iptree.h
644 new file mode 100644
645 index 0000000..64e716b
646 --- /dev/null
647 +++ b/include/linux/netfilter_ipv4/ip_set_iptree.h
648 @@ -0,0 +1,40 @@
649 +#ifndef __IP_SET_IPTREE_H
650 +#define __IP_SET_IPTREE_H
651 +
652 +#include <linux/netfilter_ipv4/ip_set.h>
653 +
654 +#define SETTYPE_NAME "iptree"
655 +#define MAX_RANGE 0x0000FFFF
656 +
657 +struct ip_set_iptreed {
658 +       unsigned long expires[256];             /* x.x.x.ADDR */
659 +};
660 +
661 +struct ip_set_iptreec {
662 +       struct ip_set_iptreed *tree[256];       /* x.x.ADDR.* */
663 +};
664 +
665 +struct ip_set_iptreeb {
666 +       struct ip_set_iptreec *tree[256];       /* x.ADDR.*.* */
667 +};
668 +
669 +struct ip_set_iptree {
670 +       unsigned int timeout;
671 +       unsigned int gc_interval;
672 +#ifdef __KERNEL__
673 +       uint32_t elements;              /* number of elements */
674 +       struct timer_list gc;
675 +       struct ip_set_iptreeb *tree[256];       /* ADDR.*.*.* */
676 +#endif
677 +};
678 +
679 +struct ip_set_req_iptree_create {
680 +       unsigned int timeout;
681 +};
682 +
683 +struct ip_set_req_iptree {
684 +       ip_set_ip_t ip;
685 +       unsigned int timeout;
686 +};
687 +
688 +#endif /* __IP_SET_IPTREE_H */
689 diff --git a/include/linux/netfilter_ipv4/ip_set_iptreemap.h b/include/linux/netfilter_ipv4/ip_set_iptreemap.h
690 new file mode 100644
691 index 0000000..bef576a
692 --- /dev/null
693 +++ b/include/linux/netfilter_ipv4/ip_set_iptreemap.h
694 @@ -0,0 +1,40 @@
695 +#ifndef __IP_SET_IPTREEMAP_H
696 +#define __IP_SET_IPTREEMAP_H
697 +
698 +#include <linux/netfilter_ipv4/ip_set.h>
699 +
700 +#define SETTYPE_NAME "iptreemap"
701 +
702 +#ifdef __KERNEL__
703 +struct ip_set_iptreemap_d {
704 +       unsigned char bitmap[32]; /* x.x.x.y */
705 +};
706 +
707 +struct ip_set_iptreemap_c {
708 +       struct ip_set_iptreemap_d *tree[256]; /* x.x.y.x */
709 +};
710 +
711 +struct ip_set_iptreemap_b {
712 +       struct ip_set_iptreemap_c *tree[256]; /* x.y.x.x */
713 +       unsigned char dirty[32];
714 +};
715 +#endif
716 +
717 +struct ip_set_iptreemap {
718 +       unsigned int gc_interval;
719 +#ifdef __KERNEL__
720 +       struct timer_list gc;
721 +       struct ip_set_iptreemap_b *tree[256]; /* y.x.x.x */
722 +#endif
723 +};
724 +
725 +struct ip_set_req_iptreemap_create {
726 +       unsigned int gc_interval;
727 +};
728 +
729 +struct ip_set_req_iptreemap {
730 +       ip_set_ip_t start;
731 +       ip_set_ip_t end;
732 +};
733 +
734 +#endif /* __IP_SET_IPTREEMAP_H */
735 diff --git a/include/linux/netfilter_ipv4/ip_set_jhash.h b/include/linux/netfilter_ipv4/ip_set_jhash.h
736 new file mode 100644
737 index 0000000..25c6b97
738 --- /dev/null
739 +++ b/include/linux/netfilter_ipv4/ip_set_jhash.h
740 @@ -0,0 +1,148 @@
741 +#ifndef _LINUX_IPSET_JHASH_H
742 +#define _LINUX_IPSET_JHASH_H
743 +
744 +/* This is a copy of linux/jhash.h but the types u32/u8 are changed
745 + * to __u32/__u8 so that the header file can be included into
746 + * userspace code as well. Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
747 + */
748 +
749 +/* jhash.h: Jenkins hash support.
750 + *
751 + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
752 + *
753 + * http://burtleburtle.net/bob/hash/
754 + *
755 + * These are the credits from Bob's sources:
756 + *
757 + * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
758 + * hash(), hash2(), hash3, and mix() are externally useful functions.
759 + * Routines to test the hash are included if SELF_TEST is defined.
760 + * You can use this free for any purpose.  It has no warranty.
761 + *
762 + * Copyright (C) 2003 David S. Miller (davem@redhat.com)
763 + *
764 + * I've modified Bob's hash to be useful in the Linux kernel, and
765 + * any bugs present are surely my fault.  -DaveM
766 + */
767 +
768 +/* NOTE: Arguments are modified. */
769 +#define __jhash_mix(a, b, c) \
770 +{ \
771 +  a -= b; a -= c; a ^= (c>>13); \
772 +  b -= c; b -= a; b ^= (a<<8); \
773 +  c -= a; c -= b; c ^= (b>>13); \
774 +  a -= b; a -= c; a ^= (c>>12);  \
775 +  b -= c; b -= a; b ^= (a<<16); \
776 +  c -= a; c -= b; c ^= (b>>5); \
777 +  a -= b; a -= c; a ^= (c>>3);  \
778 +  b -= c; b -= a; b ^= (a<<10); \
779 +  c -= a; c -= b; c ^= (b>>15); \
780 +}
781 +
782 +/* The golden ration: an arbitrary value */
783 +#define JHASH_GOLDEN_RATIO     0x9e3779b9
784 +
785 +/* The most generic version, hashes an arbitrary sequence
786 + * of bytes.  No alignment or length assumptions are made about
787 + * the input key.
788 + */
789 +static inline __u32 jhash(void *key, __u32 length, __u32 initval)
790 +{
791 +       __u32 a, b, c, len;
792 +       __u8 *k = key;
793 +
794 +       len = length;
795 +       a = b = JHASH_GOLDEN_RATIO;
796 +       c = initval;
797 +
798 +       while (len >= 12) {
799 +               a += (k[0] +((__u32)k[1]<<8) +((__u32)k[2]<<16) +((__u32)k[3]<<24));
800 +               b += (k[4] +((__u32)k[5]<<8) +((__u32)k[6]<<16) +((__u32)k[7]<<24));
801 +               c += (k[8] +((__u32)k[9]<<8) +((__u32)k[10]<<16)+((__u32)k[11]<<24));
802 +
803 +               __jhash_mix(a,b,c);
804 +
805 +               k += 12;
806 +               len -= 12;
807 +       }
808 +
809 +       c += length;
810 +       switch (len) {
811 +       case 11: c += ((__u32)k[10]<<24);
812 +       case 10: c += ((__u32)k[9]<<16);
813 +       case 9 : c += ((__u32)k[8]<<8);
814 +       case 8 : b += ((__u32)k[7]<<24);
815 +       case 7 : b += ((__u32)k[6]<<16);
816 +       case 6 : b += ((__u32)k[5]<<8);
817 +       case 5 : b += k[4];
818 +       case 4 : a += ((__u32)k[3]<<24);
819 +       case 3 : a += ((__u32)k[2]<<16);
820 +       case 2 : a += ((__u32)k[1]<<8);
821 +       case 1 : a += k[0];
822 +       };
823 +
824 +       __jhash_mix(a,b,c);
825 +
826 +       return c;
827 +}
828 +
829 +/* A special optimized version that handles 1 or more of __u32s.
830 + * The length parameter here is the number of __u32s in the key.
831 + */
832 +static inline __u32 jhash2(__u32 *k, __u32 length, __u32 initval)
833 +{
834 +       __u32 a, b, c, len;
835 +
836 +       a = b = JHASH_GOLDEN_RATIO;
837 +       c = initval;
838 +       len = length;
839 +
840 +       while (len >= 3) {
841 +               a += k[0];
842 +               b += k[1];
843 +               c += k[2];
844 +               __jhash_mix(a, b, c);
845 +               k += 3; len -= 3;
846 +       }
847 +
848 +       c += length * 4;
849 +
850 +       switch (len) {
851 +       case 2 : b += k[1];
852 +       case 1 : a += k[0];
853 +       };
854 +
855 +       __jhash_mix(a,b,c);
856 +
857 +       return c;
858 +}
859 +
860 +
861 +/* A special ultra-optimized versions that knows they are hashing exactly
862 + * 3, 2 or 1 word(s).
863 + *
864 + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
865 + *       done at the end is not done here.
866 + */
867 +static inline __u32 jhash_3words(__u32 a, __u32 b, __u32 c, __u32 initval)
868 +{
869 +       a += JHASH_GOLDEN_RATIO;
870 +       b += JHASH_GOLDEN_RATIO;
871 +       c += initval;
872 +
873 +       __jhash_mix(a, b, c);
874 +
875 +       return c;
876 +}
877 +
878 +static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval)
879 +{
880 +       return jhash_3words(a, b, 0, initval);
881 +}
882 +
883 +static inline __u32 jhash_1word(__u32 a, __u32 initval)
884 +{
885 +       return jhash_3words(a, 0, 0, initval);
886 +}
887 +
888 +#endif /* _LINUX_IPSET_JHASH_H */
889 diff --git a/include/linux/netfilter_ipv4/ip_set_macipmap.h b/include/linux/netfilter_ipv4/ip_set_macipmap.h
890 new file mode 100644
891 index 0000000..ee34c9b
892 --- /dev/null
893 +++ b/include/linux/netfilter_ipv4/ip_set_macipmap.h
894 @@ -0,0 +1,38 @@
895 +#ifndef __IP_SET_MACIPMAP_H
896 +#define __IP_SET_MACIPMAP_H
897 +
898 +#include <linux/netfilter_ipv4/ip_set.h>
899 +
900 +#define SETTYPE_NAME "macipmap"
901 +#define MAX_RANGE 0x0000FFFF
902 +
903 +/* general flags */
904 +#define IPSET_MACIP_MATCHUNSET 1
905 +
906 +/* per ip flags */
907 +#define IPSET_MACIP_ISSET      1
908 +
909 +struct ip_set_macipmap {
910 +       void *members;                  /* the macipmap proper */
911 +       ip_set_ip_t first_ip;           /* host byte order, included in range */
912 +       ip_set_ip_t last_ip;            /* host byte order, included in range */
913 +       u_int32_t flags;
914 +};
915 +
916 +struct ip_set_req_macipmap_create {
917 +       ip_set_ip_t from;
918 +       ip_set_ip_t to;
919 +       u_int32_t flags;
920 +};
921 +
922 +struct ip_set_req_macipmap {
923 +       ip_set_ip_t ip;
924 +       unsigned char ethernet[ETH_ALEN];
925 +};
926 +
927 +struct ip_set_macip {
928 +       unsigned short flags;
929 +       unsigned char ethernet[ETH_ALEN];
930 +};
931 +
932 +#endif /* __IP_SET_MACIPMAP_H */
933 diff --git a/include/linux/netfilter_ipv4/ip_set_malloc.h b/include/linux/netfilter_ipv4/ip_set_malloc.h
934 new file mode 100644
935 index 0000000..ab97e14
936 --- /dev/null
937 +++ b/include/linux/netfilter_ipv4/ip_set_malloc.h
938 @@ -0,0 +1,116 @@
939 +#ifndef _IP_SET_MALLOC_H
940 +#define _IP_SET_MALLOC_H
941 +
942 +#ifdef __KERNEL__
943 +
944 +/* Memory allocation and deallocation */
945 +static size_t max_malloc_size = 0;
946 +
947 +static inline void init_max_malloc_size(void)
948 +{
949 +#define CACHE(x) max_malloc_size = x;
950 +#include <linux/kmalloc_sizes.h>
951 +#undef CACHE
952 +}
953 +
954 +static inline void * ip_set_malloc(size_t bytes)
955 +{
956 +       if (bytes > max_malloc_size)
957 +               return vmalloc(bytes);
958 +       else
959 +               return kmalloc(bytes, GFP_KERNEL);
960 +}
961 +
962 +static inline void ip_set_free(void * data, size_t bytes)
963 +{
964 +       if (bytes > max_malloc_size)
965 +               vfree(data);
966 +       else
967 +               kfree(data);
968 +}
969 +
970 +struct harray {
971 +       size_t max_elements;
972 +       void *arrays[0];
973 +};
974 +
975 +static inline void * 
976 +harray_malloc(size_t hashsize, size_t typesize, int flags)
977 +{
978 +       struct harray *harray;
979 +       size_t max_elements, size, i, j;
980 +
981 +       if (!max_malloc_size)
982 +               init_max_malloc_size();
983 +
984 +       if (typesize > max_malloc_size)
985 +               return NULL;
986 +
987 +       max_elements = max_malloc_size/typesize;
988 +       size = hashsize/max_elements;
989 +       if (hashsize % max_elements)
990 +               size++;
991 +       
992 +       /* Last pointer signals end of arrays */
993 +       harray = kmalloc(sizeof(struct harray) + (size + 1) * sizeof(void *),
994 +                        flags);
995 +
996 +       if (!harray)
997 +               return NULL;
998 +       
999 +       for (i = 0; i < size - 1; i++) {
1000 +               harray->arrays[i] = kmalloc(max_elements * typesize, flags);
1001 +               if (!harray->arrays[i])
1002 +                       goto undo;
1003 +               memset(harray->arrays[i], 0, max_elements * typesize);
1004 +       }
1005 +       harray->arrays[i] = kmalloc((hashsize - i * max_elements) * typesize, 
1006 +                                   flags);
1007 +       if (!harray->arrays[i])
1008 +               goto undo;
1009 +       memset(harray->arrays[i], 0, (hashsize - i * max_elements) * typesize);
1010 +
1011 +       harray->max_elements = max_elements;
1012 +       harray->arrays[size] = NULL;
1013 +       
1014 +       return (void *)harray;
1015 +
1016 +    undo:
1017 +       for (j = 0; j < i; j++) {
1018 +               kfree(harray->arrays[j]);
1019 +       }
1020 +       kfree(harray);
1021 +       return NULL;
1022 +}
1023 +
1024 +static inline void harray_free(void *h)
1025 +{
1026 +       struct harray *harray = (struct harray *) h;
1027 +       size_t i;
1028 +       
1029 +       for (i = 0; harray->arrays[i] != NULL; i++)
1030 +               kfree(harray->arrays[i]);
1031 +       kfree(harray);
1032 +}
1033 +
1034 +static inline void harray_flush(void *h, size_t hashsize, size_t typesize)
1035 +{
1036 +       struct harray *harray = (struct harray *) h;
1037 +       size_t i;
1038 +       
1039 +       for (i = 0; harray->arrays[i+1] != NULL; i++)
1040 +               memset(harray->arrays[i], 0, harray->max_elements * typesize);
1041 +       memset(harray->arrays[i], 0, 
1042 +              (hashsize - i * harray->max_elements) * typesize);
1043 +}
1044 +
1045 +#define HARRAY_ELEM(h, type, which)                            \
1046 +({                                                             \
1047 +       struct harray *__h = (struct harray *)(h);              \
1048 +       ((type)((__h)->arrays[(which)/(__h)->max_elements])     \
1049 +               + (which)%(__h)->max_elements);                 \
1050 +})
1051 +
1052 +#endif                         /* __KERNEL__ */
1053 +
1054 +#endif /*_IP_SET_MALLOC_H*/
1055 diff --git a/include/linux/netfilter_ipv4/ip_set_nethash.h b/include/linux/netfilter_ipv4/ip_set_nethash.h
1056 new file mode 100644
1057 index 0000000..172ef02
1058 --- /dev/null
1059 +++ b/include/linux/netfilter_ipv4/ip_set_nethash.h
1060 @@ -0,0 +1,55 @@
1061 +#ifndef __IP_SET_NETHASH_H
1062 +#define __IP_SET_NETHASH_H
1063 +
1064 +#include <linux/netfilter_ipv4/ip_set.h>
1065 +
1066 +#define SETTYPE_NAME "nethash"
1067 +#define MAX_RANGE 0x0000FFFF
1068 +
1069 +struct ip_set_nethash {
1070 +       ip_set_ip_t *members;           /* the nethash proper */
1071 +       uint32_t elements;              /* number of elements */
1072 +       uint32_t hashsize;              /* hash size */
1073 +       uint16_t probes;                /* max number of probes  */
1074 +       uint16_t resize;                /* resize factor in percent */
1075 +       unsigned char cidr[30];         /* CIDR sizes */
1076 +       void *initval[0];               /* initvals for jhash_1word */
1077 +};
1078 +
1079 +struct ip_set_req_nethash_create {
1080 +       uint32_t hashsize;
1081 +       uint16_t probes;
1082 +       uint16_t resize;
1083 +};
1084 +
1085 +struct ip_set_req_nethash {
1086 +       ip_set_ip_t ip;
1087 +       unsigned char cidr;
1088 +};
1089 +
1090 +static unsigned char shifts[] = {255, 253, 249, 241, 225, 193, 129, 1};
1091 +
1092 +static inline ip_set_ip_t 
1093 +pack(ip_set_ip_t ip, unsigned char cidr)
1094 +{
1095 +       ip_set_ip_t addr, *paddr = &addr;
1096 +       unsigned char n, t, *a;
1097 +
1098 +       addr = htonl(ip & (0xFFFFFFFF << (32 - (cidr))));
1099 +#ifdef __KERNEL__
1100 +       DP("ip:%u.%u.%u.%u/%u", NIPQUAD(addr), cidr);
1101 +#endif
1102 +       n = cidr / 8;
1103 +       t = cidr % 8;   
1104 +       a = &((unsigned char *)paddr)[n];
1105 +       *a = *a /(1 << (8 - t)) + shifts[t];
1106 +#ifdef __KERNEL__
1107 +       DP("n: %u, t: %u, a: %u", n, t, *a);
1108 +       DP("ip:%u.%u.%u.%u/%u, %u.%u.%u.%u",
1109 +          HIPQUAD(ip), cidr, NIPQUAD(addr));
1110 +#endif
1111 +
1112 +       return ntohl(addr);
1113 +}
1114 +
1115 +#endif /* __IP_SET_NETHASH_H */
1116 diff --git a/include/linux/netfilter_ipv4/ip_set_portmap.h b/include/linux/netfilter_ipv4/ip_set_portmap.h
1117 new file mode 100644
1118 index 0000000..c17165c
1119 --- /dev/null
1120 +++ b/include/linux/netfilter_ipv4/ip_set_portmap.h
1121 @@ -0,0 +1,25 @@
1122 +#ifndef __IP_SET_PORTMAP_H
1123 +#define __IP_SET_PORTMAP_H
1124 +
1125 +#include <linux/netfilter_ipv4/ip_set.h>
1126 +
1127 +#define SETTYPE_NAME   "portmap"
1128 +#define MAX_RANGE      0x0000FFFF
1129 +#define INVALID_PORT   (MAX_RANGE + 1)
1130 +
1131 +struct ip_set_portmap {
1132 +       void *members;                  /* the portmap proper */
1133 +       ip_set_ip_t first_port;         /* host byte order, included in range */
1134 +       ip_set_ip_t last_port;          /* host byte order, included in range */
1135 +};
1136 +
1137 +struct ip_set_req_portmap_create {
1138 +       ip_set_ip_t from;
1139 +       ip_set_ip_t to;
1140 +};
1141 +
1142 +struct ip_set_req_portmap {
1143 +       ip_set_ip_t port;
1144 +};
1145 +
1146 +#endif /* __IP_SET_PORTMAP_H */
1147 diff --git a/include/linux/netfilter_ipv4/ipt_set.h b/include/linux/netfilter_ipv4/ipt_set.h
1148 new file mode 100644
1149 index 0000000..2a18b93
1150 --- /dev/null
1151 +++ b/include/linux/netfilter_ipv4/ipt_set.h
1152 @@ -0,0 +1,21 @@
1153 +#ifndef _IPT_SET_H
1154 +#define _IPT_SET_H
1155 +
1156 +#include <linux/netfilter_ipv4/ip_set.h>
1157 +
1158 +struct ipt_set_info {
1159 +       ip_set_id_t index;
1160 +       u_int32_t flags[IP_SET_MAX_BINDINGS + 1];
1161 +};
1162 +
1163 +/* match info */
1164 +struct ipt_set_info_match {
1165 +       struct ipt_set_info match_set;
1166 +};
1167 +
1168 +struct ipt_set_info_target {
1169 +       struct ipt_set_info add_set;
1170 +       struct ipt_set_info del_set;
1171 +};
1172 +
1173 +#endif /*_IPT_SET_H*/
1174 diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
1175 index 1833bdb..4c315cf 100644
1176 --- a/net/ipv4/netfilter/Kconfig
1177 +++ b/net/ipv4/netfilter/Kconfig
1178 @@ -388,5 +388,122 @@ config IP_NF_ARP_MANGLE
1179  
1180  endif # IP_NF_ARPTABLES
1181  
1182 +config IP_NF_SET
1183 +       tristate "IP set support"
1184 +       depends on INET && NETFILTER
1185 +       help
1186 +         This option adds IP set support to the kernel.
1187 +         In order to define and use sets, you need the userspace utility
1188 +         ipset(8).
1189 +
1190 +         To compile it as a module, choose M here.  If unsure, say N.
1191 +
1192 +config IP_NF_SET_MAX
1193 +       int "Maximum number of IP sets"
1194 +       default 256
1195 +       range 2 65534
1196 +       depends on IP_NF_SET
1197 +       help
1198 +         You can define here default value of the maximum number 
1199 +         of IP sets for the kernel.
1200 +
1201 +         The value can be overriden by the 'max_sets' module
1202 +         parameter of the 'ip_set' module.
1203 +
1204 +config IP_NF_SET_HASHSIZE
1205 +       int "Hash size for bindings of IP sets"
1206 +       default 1024
1207 +       depends on IP_NF_SET
1208 +       help
1209 +         You can define here default value of the hash size for
1210 +         bindings of IP sets.
1211 +
1212 +         The value can be overriden by the 'hash_size' module
1213 +         parameter of the 'ip_set' module.
1214 +
1215 +config IP_NF_SET_IPMAP
1216 +       tristate "ipmap set support"
1217 +       depends on IP_NF_SET
1218 +       help
1219 +         This option adds the ipmap set type support.
1220 +
1221 +         To compile it as a module, choose M here.  If unsure, say N.
1222 +
1223 +config IP_NF_SET_MACIPMAP
1224 +       tristate "macipmap set support"
1225 +       depends on IP_NF_SET
1226 +       help
1227 +         This option adds the macipmap set type support.
1228 +
1229 +         To compile it as a module, choose M here.  If unsure, say N.
1230 +
1231 +config IP_NF_SET_PORTMAP
1232 +       tristate "portmap set support"
1233 +       depends on IP_NF_SET
1234 +       help
1235 +         This option adds the portmap set type support.
1236 +
1237 +         To compile it as a module, choose M here.  If unsure, say N.
1238 +
1239 +config IP_NF_SET_IPHASH
1240 +       tristate "iphash set support"
1241 +       depends on IP_NF_SET
1242 +       help
1243 +         This option adds the iphash set type support.
1244 +
1245 +         To compile it as a module, choose M here.  If unsure, say N.
1246 +
1247 +config IP_NF_SET_NETHASH
1248 +       tristate "nethash set support"
1249 +       depends on IP_NF_SET
1250 +       help
1251 +         This option adds the nethash set type support.
1252 +
1253 +         To compile it as a module, choose M here.  If unsure, say N.
1254 +
1255 +config IP_NF_SET_IPPORTHASH
1256 +       tristate "ipporthash set support"
1257 +       depends on IP_NF_SET
1258 +       help
1259 +         This option adds the ipporthash set type support.
1260 +
1261 +         To compile it as a module, choose M here.  If unsure, say N.
1262 +
1263 +config IP_NF_SET_IPTREE
1264 +       tristate "iptree set support"
1265 +       depends on IP_NF_SET
1266 +       help
1267 +         This option adds the iptree set type support.
1268 +
1269 +         To compile it as a module, choose M here.  If unsure, say N.
1270 +
1271 +config IP_NF_SET_IPTREEMAP
1272 +       tristate "iptreemap set support"
1273 +       depends on IP_NF_SET
1274 +       help
1275 +         This option adds the iptreemap set type support.
1276 +
1277 +         To compile it as a module, choose M here.  If unsure, say N.
1278 +
1279 +config IP_NF_MATCH_SET
1280 +       tristate "set match support"
1281 +       depends on IP_NF_SET
1282 +       help
1283 +         Set matching matches against given IP sets.
1284 +         You need the ipset utility to create and set up the sets.
1285 +
1286 +         To compile it as a module, choose M here.  If unsure, say N.
1287 +
1288 +config IP_NF_TARGET_SET
1289 +       tristate "SET target support"
1290 +       depends on IP_NF_SET
1291 +       help
1292 +         The SET target makes possible to add/delete entries
1293 +         in IP sets.
1294 +         You need the ipset utility to create and set up the sets.
1295 +
1296 +         To compile it as a module, choose M here.  If unsure, say N.
1297 +
1298 +
1299  endmenu
1300  
1301 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
1302 index 4811159..fd4913a 100644
1303 --- a/net/ipv4/netfilter/Makefile
1304 +++ b/net/ipv4/netfilter/Makefile
1305 @@ -51,6 +51,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
1306  obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
1307  obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
1308  obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
1309 +obj-$(CONFIG_IP_NF_MATCH_SET) += ipt_set.o
1310  
1311  # targets
1312  obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
1313 @@ -60,8 +61,20 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
1314  obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
1315  obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
1316  obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
1317 +obj-$(CONFIG_IP_NF_TARGET_SET) += ipt_SET.o
1318  obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
1319  
1320 +# sets
1321 +obj-$(CONFIG_IP_NF_SET) += ip_set.o
1322 +obj-$(CONFIG_IP_NF_SET_IPMAP) += ip_set_ipmap.o
1323 +obj-$(CONFIG_IP_NF_SET_PORTMAP) += ip_set_portmap.o
1324 +obj-$(CONFIG_IP_NF_SET_MACIPMAP) += ip_set_macipmap.o
1325 +obj-$(CONFIG_IP_NF_SET_IPHASH) += ip_set_iphash.o
1326 +obj-$(CONFIG_IP_NF_SET_NETHASH) += ip_set_nethash.o
1327 +obj-$(CONFIG_IP_NF_SET_IPPORTHASH) += ip_set_ipporthash.o
1328 +obj-$(CONFIG_IP_NF_SET_IPTREE) += ip_set_iptree.o
1329 +obj-$(CONFIG_IP_NF_SET_IPTREEMAP) += ip_set_iptreemap.o
1330 +
1331  # generic ARP tables
1332  obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
1333  obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
1334 diff --git a/net/ipv4/netfilter/ip_set.c b/net/ipv4/netfilter/ip_set.c
1335 new file mode 100644
1336 index 0000000..7d00a14
1337 --- /dev/null
1338 +++ b/net/ipv4/netfilter/ip_set.c
1339 @@ -0,0 +1,2005 @@
1340 +/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
1341 + *                         Patrick Schaaf <bof@bof.de>
1342 + * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
1343 + *
1344 + * This program is free software; you can redistribute it and/or modify
1345 + * it under the terms of the GNU General Public License version 2 as
1346 + * published by the Free Software Foundation.  
1347 + */
1348 +
1349 +/* Kernel module for IP set management */
1350 +
1351 +#include <linux/version.h>
1352 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
1353 +#include <linux/config.h>
1354 +#endif
1355 +#include <linux/module.h>
1356 +#include <linux/moduleparam.h>
1357 +#include <linux/kmod.h>
1358 +#include <linux/ip.h>
1359 +#include <linux/skbuff.h>
1360 +#include <linux/random.h>
1361 +#include <linux/jhash.h>
1362 +#include <linux/netfilter_ipv4/ip_tables.h>
1363 +#include <linux/errno.h>
1364 +#include <asm/uaccess.h>
1365 +#include <asm/bitops.h>
1366 +// #include <asm/semaphore.h>
1367 +#include <linux/spinlock.h>
1368 +#include <linux/vmalloc.h>
1369 +
1370 +#define ASSERT_READ_LOCK(x)
1371 +#define ASSERT_WRITE_LOCK(x)
1372 +#include <linux/netfilter_ipv4/ip_set.h>
1373 +
1374 +static struct list_head set_type_list;         /* all registered sets */
1375 +static struct ip_set **ip_set_list;            /* all individual sets */
1376 +static DEFINE_RWLOCK(ip_set_lock);             /* protects the lists and the hash */
1377 +static DECLARE_MUTEX(ip_set_app_mutex);                /* serializes user access */
1378 +static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX;
1379 +static ip_set_id_t ip_set_bindings_hash_size =  CONFIG_IP_NF_SET_HASHSIZE;
1380 +static struct list_head *ip_set_hash;          /* hash of bindings */
1381 +static unsigned int ip_set_hash_random;                /* random seed */
1382 +
1383 +/*
1384 + * Sets are identified either by the index in ip_set_list or by id.
1385 + * The id never changes and is used to find a key in the hash. 
1386 + * The index may change by swapping and used at all other places 
1387 + * (set/SET netfilter modules, binding value, etc.)
1388 + *
1389 + * Userspace requests are serialized by ip_set_mutex and sets can
1390 + * be deleted only from userspace. Therefore ip_set_list locking 
1391 + * must obey the following rules:
1392 + *
1393 + * - kernel requests: read and write locking mandatory
1394 + * - user requests: read locking optional, write locking mandatory
1395 + */
1396 +
1397 +static inline void
1398 +__ip_set_get(ip_set_id_t index)
1399 +{
1400 +       atomic_inc(&ip_set_list[index]->ref);
1401 +}
1402 +
1403 +static inline void
1404 +__ip_set_put(ip_set_id_t index)
1405 +{
1406 +       atomic_dec(&ip_set_list[index]->ref);
1407 +}
1408 +
1409 +/*
1410 + * Binding routines
1411 + */
1412 +
1413 +static inline struct ip_set_hash *
1414 +__ip_set_find(u_int32_t key, ip_set_id_t id, ip_set_ip_t ip)
1415 +{
1416 +       struct ip_set_hash *set_hash;
1417 +
1418 +       list_for_each_entry(set_hash, &ip_set_hash[key], list)
1419 +               if (set_hash->id == id && set_hash->ip == ip)
1420 +                       return set_hash;
1421 +                       
1422 +       return NULL;
1423 +}
1424 +
1425 +static ip_set_id_t
1426 +ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip)
1427 +{
1428 +       u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) 
1429 +                               % ip_set_bindings_hash_size;
1430 +       struct ip_set_hash *set_hash;
1431 +
1432 +       ASSERT_READ_LOCK(&ip_set_lock);
1433 +       IP_SET_ASSERT(ip_set_list[id]);
1434 +       DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
1435 +       
1436 +       set_hash = __ip_set_find(key, id, ip);
1437 +       
1438 +       DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
1439 +          HIPQUAD(ip),
1440 +          set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
1441 +
1442 +       return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID);
1443 +}
1444 +
1445 +static inline void 
1446 +__set_hash_del(struct ip_set_hash *set_hash)
1447 +{
1448 +       ASSERT_WRITE_LOCK(&ip_set_lock);
1449 +       IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
1450 +
1451 +       __ip_set_put(set_hash->binding);
1452 +       list_del(&set_hash->list);
1453 +       kfree(set_hash);
1454 +}
1455 +
1456 +static int
1457 +ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip)
1458 +{
1459 +       u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
1460 +                               % ip_set_bindings_hash_size;
1461 +       struct ip_set_hash *set_hash;
1462 +       
1463 +       IP_SET_ASSERT(ip_set_list[id]);
1464 +       DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
1465 +       write_lock_bh(&ip_set_lock);
1466 +       set_hash = __ip_set_find(key, id, ip);
1467 +       DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
1468 +          HIPQUAD(ip),
1469 +          set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
1470 +
1471 +       if (set_hash != NULL)
1472 +               __set_hash_del(set_hash);
1473 +       write_unlock_bh(&ip_set_lock);
1474 +       return 0;
1475 +}
1476 +
1477 +static int 
1478 +ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding)
1479 +{
1480 +       u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
1481 +                               % ip_set_bindings_hash_size;
1482 +       struct ip_set_hash *set_hash;
1483 +       int ret = 0;
1484 +       
1485 +       IP_SET_ASSERT(ip_set_list[id]);
1486 +       IP_SET_ASSERT(ip_set_list[binding]);
1487 +       DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
1488 +          HIPQUAD(ip), ip_set_list[binding]->name);
1489 +       write_lock_bh(&ip_set_lock);
1490 +       set_hash = __ip_set_find(key, id, ip);
1491 +       if (!set_hash) {
1492 +               set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_ATOMIC);
1493 +               if (!set_hash) {
1494 +                       ret = -ENOMEM;
1495 +                       goto unlock;
1496 +               }
1497 +               INIT_LIST_HEAD(&set_hash->list);
1498 +               set_hash->id = id;
1499 +               set_hash->ip = ip;
1500 +               list_add(&set_hash->list, &ip_set_hash[key]);
1501 +       } else {
1502 +               IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
1503 +               DP("overwrite binding: %s",
1504 +                  ip_set_list[set_hash->binding]->name);
1505 +               __ip_set_put(set_hash->binding);
1506 +       }
1507 +       set_hash->binding = binding;
1508 +       __ip_set_get(set_hash->binding);
1509 +       DP("stored: key %u, id %u (%s), ip %u.%u.%u.%u, binding %u (%s)",
1510 +          key, id, ip_set_list[id]->name,
1511 +          HIPQUAD(ip), binding, ip_set_list[binding]->name);
1512 +    unlock:
1513 +       write_unlock_bh(&ip_set_lock);
1514 +       return ret;
1515 +}
1516 +
1517 +#define FOREACH_HASH_DO(fn, args...)                                           \
1518 +({                                                                             \
1519 +       ip_set_id_t __key;                                                      \
1520 +       struct ip_set_hash *__set_hash;                                         \
1521 +                                                                               \
1522 +       for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
1523 +               list_for_each_entry(__set_hash, &ip_set_hash[__key], list)      \
1524 +                       fn(__set_hash , ## args);                               \
1525 +       }                                                                       \
1526 +})
1527 +
1528 +#define FOREACH_HASH_RW_DO(fn, args...)                                                \
1529 +({                                                                             \
1530 +       ip_set_id_t __key;                                                      \
1531 +       struct ip_set_hash *__set_hash, *__n;                                   \
1532 +                                                                               \
1533 +       ASSERT_WRITE_LOCK(&ip_set_lock);                                        \
1534 +       for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
1535 +               list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\
1536 +                       fn(__set_hash , ## args);                               \
1537 +       }                                                                       \
1538 +})
1539 +
1540 +/* Add, del and test set entries from kernel */
1541 +
1542 +#define follow_bindings(index, set, ip)                                        \
1543 +((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID     \
1544 + || (index = (set)->binding) != IP_SET_INVALID_ID)
1545 +
1546 +int
1547 +ip_set_testip_kernel(ip_set_id_t index,
1548 +                    const struct sk_buff *skb,
1549 +                    const u_int32_t *flags)
1550 +{
1551 +       struct ip_set *set;
1552 +       ip_set_ip_t ip;
1553 +       int res;
1554 +       unsigned char i = 0;
1555 +       
1556 +       IP_SET_ASSERT(flags[i]);
1557 +       read_lock_bh(&ip_set_lock);
1558 +       do {
1559 +               set = ip_set_list[index];
1560 +               IP_SET_ASSERT(set);
1561 +               DP("set %s, index %u", set->name, index);
1562 +               read_lock_bh(&set->lock);
1563 +               res = set->type->testip_kernel(set, skb, &ip, flags, i++);
1564 +               read_unlock_bh(&set->lock);
1565 +               i += !!(set->type->features & IPSET_DATA_DOUBLE);
1566 +       } while (res > 0 
1567 +                && flags[i] 
1568 +                && follow_bindings(index, set, ip));
1569 +       read_unlock_bh(&ip_set_lock);
1570 +
1571 +       return res;
1572 +}
1573 +
1574 +void
1575 +ip_set_addip_kernel(ip_set_id_t index,
1576 +                   const struct sk_buff *skb,
1577 +                   const u_int32_t *flags)
1578 +{
1579 +       struct ip_set *set;
1580 +       ip_set_ip_t ip;
1581 +       int res;
1582 +       unsigned char i = 0;
1583 +
1584 +       IP_SET_ASSERT(flags[i]);
1585 +   retry:
1586 +       read_lock_bh(&ip_set_lock);
1587 +       do {
1588 +               set = ip_set_list[index];
1589 +               IP_SET_ASSERT(set);
1590 +               DP("set %s, index %u", set->name, index);
1591 +               write_lock_bh(&set->lock);
1592 +               res = set->type->addip_kernel(set, skb, &ip, flags, i++);
1593 +               write_unlock_bh(&set->lock);
1594 +               i += !!(set->type->features & IPSET_DATA_DOUBLE);
1595 +       } while ((res == 0 || res == -EEXIST)
1596 +                && flags[i] 
1597 +                && follow_bindings(index, set, ip));
1598 +       read_unlock_bh(&ip_set_lock);
1599 +
1600 +       if (res == -EAGAIN
1601 +           && set->type->retry
1602 +           && (res = set->type->retry(set)) == 0)
1603 +               goto retry;
1604 +}
1605 +
1606 +void
1607 +ip_set_delip_kernel(ip_set_id_t index,
1608 +                   const struct sk_buff *skb,
1609 +                   const u_int32_t *flags)
1610 +{
1611 +       struct ip_set *set;
1612 +       ip_set_ip_t ip;
1613 +       int res;
1614 +       unsigned char i = 0;
1615 +
1616 +       IP_SET_ASSERT(flags[i]);
1617 +       read_lock_bh(&ip_set_lock);
1618 +       do {
1619 +               set = ip_set_list[index];
1620 +               IP_SET_ASSERT(set);
1621 +               DP("set %s, index %u", set->name, index);
1622 +               write_lock_bh(&set->lock);
1623 +               res = set->type->delip_kernel(set, skb, &ip, flags, i++);
1624 +               write_unlock_bh(&set->lock);
1625 +               i += !!(set->type->features & IPSET_DATA_DOUBLE);
1626 +       } while ((res == 0 || res == -EEXIST)
1627 +                && flags[i] 
1628 +                && follow_bindings(index, set, ip));
1629 +       read_unlock_bh(&ip_set_lock);
1630 +}
1631 +
1632 +/* Register and deregister settype */
1633 +
1634 +static inline struct ip_set_type *
1635 +find_set_type(const char *name)
1636 +{
1637 +       struct ip_set_type *set_type;
1638 +
1639 +       list_for_each_entry(set_type, &set_type_list, list)
1640 +               if (!strncmp(set_type->typename, name, IP_SET_MAXNAMELEN - 1))
1641 +                       return set_type;
1642 +       return NULL;
1643 +}
1644 +
1645 +int 
1646 +ip_set_register_set_type(struct ip_set_type *set_type)
1647 +{
1648 +       int ret = 0;
1649 +       
1650 +       if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) {
1651 +               ip_set_printk("'%s' uses wrong protocol version %u (want %u)",
1652 +                             set_type->typename,
1653 +                             set_type->protocol_version,
1654 +                             IP_SET_PROTOCOL_VERSION);
1655 +               return -EINVAL;
1656 +       }
1657 +
1658 +       write_lock_bh(&ip_set_lock);
1659 +       if (find_set_type(set_type->typename)) {
1660 +               /* Duplicate! */
1661 +               ip_set_printk("'%s' already registered!", 
1662 +                             set_type->typename);
1663 +               ret = -EINVAL;
1664 +               goto unlock;
1665 +       }
1666 +       if (!try_module_get(THIS_MODULE)) {
1667 +               ret = -EFAULT;
1668 +               goto unlock;
1669 +       }
1670 +       list_add(&set_type->list, &set_type_list);
1671 +       DP("'%s' registered.", set_type->typename);
1672 +   unlock:
1673 +       write_unlock_bh(&ip_set_lock);
1674 +       return ret;
1675 +}
1676 +
1677 +void
1678 +ip_set_unregister_set_type(struct ip_set_type *set_type)
1679 +{
1680 +       write_lock_bh(&ip_set_lock);
1681 +       if (!find_set_type(set_type->typename)) {
1682 +               ip_set_printk("'%s' not registered?",
1683 +                             set_type->typename);
1684 +               goto unlock;
1685 +       }
1686 +       list_del(&set_type->list);
1687 +       module_put(THIS_MODULE);
1688 +       DP("'%s' unregistered.", set_type->typename);
1689 +   unlock:
1690 +       write_unlock_bh(&ip_set_lock);
1691 +
1692 +}
1693 +
1694 +/*
1695 + * Userspace routines
1696 + */
1697 +
1698 +/*
1699 + * Find set by name, reference it once. The reference makes sure the
1700 + * thing pointed to, does not go away under our feet. Drop the reference
1701 + * later, using ip_set_put().
1702 + */
1703 +ip_set_id_t
1704 +ip_set_get_byname(const char *name)
1705 +{
1706 +       ip_set_id_t i, index = IP_SET_INVALID_ID;
1707 +       
1708 +       down(&ip_set_app_mutex);
1709 +       for (i = 0; i < ip_set_max; i++) {
1710 +               if (ip_set_list[i] != NULL
1711 +                   && strcmp(ip_set_list[i]->name, name) == 0) {
1712 +                       __ip_set_get(i);
1713 +                       index = i;
1714 +                       break;
1715 +               }
1716 +       }
1717 +       up(&ip_set_app_mutex);
1718 +       return index;
1719 +}
1720 +
1721 +/*
1722 + * Find set by index, reference it once. The reference makes sure the
1723 + * thing pointed to, does not go away under our feet. Drop the reference
1724 + * later, using ip_set_put().
1725 + */
1726 +ip_set_id_t
1727 +ip_set_get_byindex(ip_set_id_t index)
1728 +{
1729 +       down(&ip_set_app_mutex);
1730 +
1731 +       if (index >= ip_set_max)
1732 +               return IP_SET_INVALID_ID;
1733 +       
1734 +       if (ip_set_list[index])
1735 +               __ip_set_get(index);
1736 +       else
1737 +               index = IP_SET_INVALID_ID;
1738 +               
1739 +       up(&ip_set_app_mutex);
1740 +       return index;
1741 +}
1742 +
1743 +/*
1744 + * If the given set pointer points to a valid set, decrement
1745 + * reference count by 1. The caller shall not assume the index
1746 + * to be valid, after calling this function.
1747 + */
1748 +void ip_set_put(ip_set_id_t index)
1749 +{
1750 +       down(&ip_set_app_mutex);
1751 +       if (ip_set_list[index])
1752 +               __ip_set_put(index);
1753 +       up(&ip_set_app_mutex);
1754 +}
1755 +
1756 +/* Find a set by name or index */
1757 +static ip_set_id_t
1758 +ip_set_find_byname(const char *name)
1759 +{
1760 +       ip_set_id_t i, index = IP_SET_INVALID_ID;
1761 +       
1762 +       for (i = 0; i < ip_set_max; i++) {
1763 +               if (ip_set_list[i] != NULL
1764 +                   && strcmp(ip_set_list[i]->name, name) == 0) {
1765 +                       index = i;
1766 +                       break;
1767 +               }
1768 +       }
1769 +       return index;
1770 +}
1771 +
1772 +static ip_set_id_t
1773 +ip_set_find_byindex(ip_set_id_t index)
1774 +{
1775 +       if (index >= ip_set_max || ip_set_list[index] == NULL)
1776 +               index = IP_SET_INVALID_ID;
1777 +       
1778 +       return index;
1779 +}
1780 +
1781 +/*
1782 + * Add, del, test, bind and unbind
1783 + */
1784 +
1785 +static inline int
1786 +__ip_set_testip(struct ip_set *set,
1787 +               const void *data,
1788 +               size_t size,
1789 +               ip_set_ip_t *ip)
1790 +{
1791 +       int res;
1792 +
1793 +       read_lock_bh(&set->lock);
1794 +       res = set->type->testip(set, data, size, ip);
1795 +       read_unlock_bh(&set->lock);
1796 +
1797 +       return res;
1798 +}
1799 +
1800 +static int
1801 +__ip_set_addip(ip_set_id_t index,
1802 +              const void *data,
1803 +              size_t size)
1804 +{
1805 +       struct ip_set *set = ip_set_list[index];
1806 +       ip_set_ip_t ip;
1807 +       int res;
1808 +       
1809 +       IP_SET_ASSERT(set);
1810 +       do {
1811 +               write_lock_bh(&set->lock);
1812 +               res = set->type->addip(set, data, size, &ip);
1813 +               write_unlock_bh(&set->lock);
1814 +       } while (res == -EAGAIN
1815 +                && set->type->retry
1816 +                && (res = set->type->retry(set)) == 0);
1817 +
1818 +       return res;
1819 +}
1820 +
1821 +static int
1822 +ip_set_addip(ip_set_id_t index,
1823 +            const void *data,
1824 +            size_t size)
1825 +{
1826 +
1827 +       return __ip_set_addip(index,
1828 +                             data + sizeof(struct ip_set_req_adt),
1829 +                             size - sizeof(struct ip_set_req_adt));
1830 +}
1831 +
1832 +static int
1833 +ip_set_delip(ip_set_id_t index,
1834 +            const void *data,
1835 +            size_t size)
1836 +{
1837 +       struct ip_set *set = ip_set_list[index];
1838 +       ip_set_ip_t ip;
1839 +       int res;
1840 +       
1841 +       IP_SET_ASSERT(set);
1842 +       write_lock_bh(&set->lock);
1843 +       res = set->type->delip(set,
1844 +                              data + sizeof(struct ip_set_req_adt),
1845 +                              size - sizeof(struct ip_set_req_adt),
1846 +                              &ip);
1847 +       write_unlock_bh(&set->lock);
1848 +
1849 +       return res;
1850 +}
1851 +
1852 +static int
1853 +ip_set_testip(ip_set_id_t index,
1854 +             const void *data,
1855 +             size_t size)
1856 +{
1857 +       struct ip_set *set = ip_set_list[index];
1858 +       ip_set_ip_t ip;
1859 +       int res;
1860 +
1861 +       IP_SET_ASSERT(set);
1862 +       res = __ip_set_testip(set,
1863 +                             data + sizeof(struct ip_set_req_adt),
1864 +                             size - sizeof(struct ip_set_req_adt),
1865 +                             &ip);
1866 +
1867 +       return (res > 0 ? -EEXIST : res);
1868 +}
1869 +
1870 +static int
1871 +ip_set_bindip(ip_set_id_t index,
1872 +             const void *data,
1873 +             size_t size)
1874 +{
1875 +       struct ip_set *set = ip_set_list[index];
1876 +       struct ip_set_req_bind *req_bind;
1877 +       ip_set_id_t binding;
1878 +       ip_set_ip_t ip;
1879 +       int res;
1880 +
1881 +       IP_SET_ASSERT(set);
1882 +       if (size < sizeof(struct ip_set_req_bind))
1883 +               return -EINVAL;
1884 +               
1885 +       req_bind = (struct ip_set_req_bind *) data;
1886 +       req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
1887 +
1888 +       if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
1889 +               /* Default binding of a set */
1890 +               char *binding_name;
1891 +               
1892 +               if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
1893 +                       return -EINVAL;
1894 +
1895 +               binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
1896 +               binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
1897 +
1898 +               binding = ip_set_find_byname(binding_name);
1899 +               if (binding == IP_SET_INVALID_ID)
1900 +                       return -ENOENT;
1901 +
1902 +               write_lock_bh(&ip_set_lock);
1903 +               /* Sets as binding values are referenced */
1904 +               if (set->binding != IP_SET_INVALID_ID)
1905 +                       __ip_set_put(set->binding);
1906 +               set->binding = binding;
1907 +               __ip_set_get(set->binding);
1908 +               write_unlock_bh(&ip_set_lock);
1909 +
1910 +               return 0;
1911 +       }
1912 +       binding = ip_set_find_byname(req_bind->binding);
1913 +       if (binding == IP_SET_INVALID_ID)
1914 +               return -ENOENT;
1915 +
1916 +       res = __ip_set_testip(set,
1917 +                             data + sizeof(struct ip_set_req_bind),
1918 +                             size - sizeof(struct ip_set_req_bind),
1919 +                             &ip);
1920 +       DP("set %s, ip: %u.%u.%u.%u, binding %s",
1921 +          set->name, HIPQUAD(ip), ip_set_list[binding]->name);
1922 +       
1923 +       if (res >= 0)
1924 +               res = ip_set_hash_add(set->id, ip, binding);
1925 +
1926 +       return res;
1927 +}
1928 +
1929 +#define FOREACH_SET_DO(fn, args...)                            \
1930 +({                                                             \
1931 +       ip_set_id_t __i;                                        \
1932 +       struct ip_set *__set;                                   \
1933 +                                                               \
1934 +       for (__i = 0; __i < ip_set_max; __i++) {                \
1935 +               __set = ip_set_list[__i];                       \
1936 +               if (__set != NULL)                              \
1937 +                       fn(__set , ##args);                     \
1938 +       }                                                       \
1939 +})
1940 +
1941 +static inline void
1942 +__set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id)
1943 +{
1944 +       if (set_hash->id == id)
1945 +               __set_hash_del(set_hash);
1946 +}
1947 +
1948 +static inline void
1949 +__unbind_default(struct ip_set *set)
1950 +{
1951 +       if (set->binding != IP_SET_INVALID_ID) {
1952 +               /* Sets as binding values are referenced */
1953 +               __ip_set_put(set->binding);
1954 +               set->binding = IP_SET_INVALID_ID;
1955 +       }
1956 +}
1957 +
1958 +static int
1959 +ip_set_unbindip(ip_set_id_t index,
1960 +               const void *data,
1961 +               size_t size)
1962 +{
1963 +       struct ip_set *set;
1964 +       struct ip_set_req_bind *req_bind;
1965 +       ip_set_ip_t ip;
1966 +       int res;
1967 +
1968 +       DP("");
1969 +       if (size < sizeof(struct ip_set_req_bind))
1970 +               return -EINVAL;
1971 +               
1972 +       req_bind = (struct ip_set_req_bind *) data;
1973 +       req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
1974 +       
1975 +       DP("%u %s", index, req_bind->binding);
1976 +       if (index == IP_SET_INVALID_ID) {
1977 +               /* unbind :all: */
1978 +               if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
1979 +                       /* Default binding of sets */
1980 +                       write_lock_bh(&ip_set_lock);
1981 +                       FOREACH_SET_DO(__unbind_default);
1982 +                       write_unlock_bh(&ip_set_lock);
1983 +                       return 0;
1984 +               } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
1985 +                       /* Flush all bindings of all sets*/
1986 +                       write_lock_bh(&ip_set_lock);
1987 +                       FOREACH_HASH_RW_DO(__set_hash_del);
1988 +                       write_unlock_bh(&ip_set_lock);
1989 +                       return 0;
1990 +               }
1991 +               DP("unreachable reached!");
1992 +               return -EINVAL;
1993 +       }
1994 +       
1995 +       set = ip_set_list[index];
1996 +       IP_SET_ASSERT(set);
1997 +       if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
1998 +               /* Default binding of set */
1999 +               ip_set_id_t binding = ip_set_find_byindex(set->binding);
2000 +
2001 +               if (binding == IP_SET_INVALID_ID)
2002 +                       return -ENOENT;
2003 +                       
2004 +               write_lock_bh(&ip_set_lock);
2005 +               /* Sets in hash values are referenced */
2006 +               __ip_set_put(set->binding);
2007 +               set->binding = IP_SET_INVALID_ID;
2008 +               write_unlock_bh(&ip_set_lock);
2009 +
2010 +               return 0;
2011 +       } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
2012 +               /* Flush all bindings */
2013 +
2014 +               write_lock_bh(&ip_set_lock);
2015 +               FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
2016 +               write_unlock_bh(&ip_set_lock);
2017 +               return 0;
2018 +       }
2019 +       
2020 +       res = __ip_set_testip(set,
2021 +                             data + sizeof(struct ip_set_req_bind),
2022 +                             size - sizeof(struct ip_set_req_bind),
2023 +                             &ip);
2024 +
2025 +       DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
2026 +       if (res >= 0)
2027 +               res = ip_set_hash_del(set->id, ip);
2028 +
2029 +       return res;
2030 +}
2031 +
2032 +static int
2033 +ip_set_testbind(ip_set_id_t index,
2034 +               const void *data,
2035 +               size_t size)
2036 +{
2037 +       struct ip_set *set = ip_set_list[index];
2038 +       struct ip_set_req_bind *req_bind;
2039 +       ip_set_id_t binding;
2040 +       ip_set_ip_t ip;
2041 +       int res;
2042 +
2043 +       IP_SET_ASSERT(set);
2044 +       if (size < sizeof(struct ip_set_req_bind))
2045 +               return -EINVAL;
2046 +               
2047 +       req_bind = (struct ip_set_req_bind *) data;
2048 +       req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
2049 +
2050 +       if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
2051 +               /* Default binding of set */
2052 +               char *binding_name;
2053 +               
2054 +               if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
2055 +                       return -EINVAL;
2056 +
2057 +               binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
2058 +               binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
2059 +
2060 +               binding = ip_set_find_byname(binding_name);
2061 +               if (binding == IP_SET_INVALID_ID)
2062 +                       return -ENOENT;
2063 +               
2064 +               res = (set->binding == binding) ? -EEXIST : 0;
2065 +
2066 +               return res;
2067 +       }
2068 +       binding = ip_set_find_byname(req_bind->binding);
2069 +       if (binding == IP_SET_INVALID_ID)
2070 +               return -ENOENT;
2071 +               
2072 +       
2073 +       res = __ip_set_testip(set,
2074 +                             data + sizeof(struct ip_set_req_bind),
2075 +                             size - sizeof(struct ip_set_req_bind),
2076 +                             &ip);
2077 +       DP("set %s, ip: %u.%u.%u.%u, binding %s",
2078 +          set->name, HIPQUAD(ip), ip_set_list[binding]->name);
2079 +          
2080 +       if (res >= 0)
2081 +               res = (ip_set_find_in_hash(set->id, ip) == binding)
2082 +                       ? -EEXIST : 0;
2083 +
2084 +       return res;
2085 +}
2086 +
2087 +static struct ip_set_type *
2088 +find_set_type_rlock(const char *typename)
2089 +{
2090 +       struct ip_set_type *type;
2091 +       
2092 +       read_lock_bh(&ip_set_lock);
2093 +       type = find_set_type(typename);
2094 +       if (type == NULL)
2095 +               read_unlock_bh(&ip_set_lock);
2096 +
2097 +       return type;
2098 +}
2099 +
2100 +static int
2101 +find_free_id(const char *name,
2102 +            ip_set_id_t *index,
2103 +            ip_set_id_t *id)
2104 +{
2105 +       ip_set_id_t i;
2106 +
2107 +       *id = IP_SET_INVALID_ID;
2108 +       for (i = 0;  i < ip_set_max; i++) {
2109 +               if (ip_set_list[i] == NULL) {
2110 +                       if (*id == IP_SET_INVALID_ID)
2111 +                               *id = *index = i;
2112 +               } else if (strcmp(name, ip_set_list[i]->name) == 0)
2113 +                       /* Name clash */
2114 +                       return -EEXIST;
2115 +       }
2116 +       if (*id == IP_SET_INVALID_ID)
2117 +               /* No free slot remained */
2118 +               return -ERANGE;
2119 +       /* Check that index is usable as id (swapping) */
2120 +    check:     
2121 +       for (i = 0;  i < ip_set_max; i++) {
2122 +               if (ip_set_list[i] != NULL
2123 +                   && ip_set_list[i]->id == *id) {
2124 +                   *id = i;
2125 +                   goto check;
2126 +               }
2127 +       }
2128 +       return 0;
2129 +}
2130 +
2131 +/*
2132 + * Create a set
2133 + */
2134 +static int
2135 +ip_set_create(const char *name,
2136 +             const char *typename,
2137 +             ip_set_id_t restore,
2138 +             const void *data,
2139 +             size_t size)
2140 +{
2141 +       struct ip_set *set;
2142 +       ip_set_id_t index = 0, id;
2143 +       int res = 0;
2144 +
2145 +       DP("setname: %s, typename: %s, id: %u", name, typename, restore);
2146 +       /*
2147 +        * First, and without any locks, allocate and initialize
2148 +        * a normal base set structure.
2149 +        */
2150 +       set = kmalloc(sizeof(struct ip_set), GFP_KERNEL);
2151 +       if (!set)
2152 +               return -ENOMEM;
2153 +       set->lock = RW_LOCK_UNLOCKED;
2154 +       strncpy(set->name, name, IP_SET_MAXNAMELEN);
2155 +       set->binding = IP_SET_INVALID_ID;
2156 +       atomic_set(&set->ref, 0);
2157 +
2158 +       /*
2159 +        * Next, take the &ip_set_lock, check that we know the type,
2160 +        * and take a reference on the type, to make sure it
2161 +        * stays available while constructing our new set.
2162 +        *
2163 +        * After referencing the type, we drop the &ip_set_lock,
2164 +        * and let the new set construction run without locks.
2165 +        */
2166 +       set->type = find_set_type_rlock(typename);
2167 +       if (set->type == NULL) {
2168 +               /* Try loading the module */
2169 +               char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1];
2170 +               strcpy(modulename, "ip_set_");
2171 +               strcat(modulename, typename);
2172 +               DP("try to load %s", modulename);
2173 +               request_module(modulename);
2174 +               set->type = find_set_type_rlock(typename);
2175 +       }
2176 +       if (set->type == NULL) {
2177 +               ip_set_printk("no set type '%s', set '%s' not created",
2178 +                             typename, name);
2179 +               res = -ENOENT;
2180 +               goto out;
2181 +       }
2182 +       if (!try_module_get(set->type->me)) {
2183 +               read_unlock_bh(&ip_set_lock);
2184 +               res = -EFAULT;
2185 +               goto out;
2186 +       }
2187 +       read_unlock_bh(&ip_set_lock);
2188 +
2189 +       /*
2190 +        * Without holding any locks, create private part.
2191 +        */
2192 +       res = set->type->create(set, data, size);
2193 +       if (res != 0)
2194 +               goto put_out;
2195 +
2196 +       /* BTW, res==0 here. */
2197 +
2198 +       /*
2199 +        * Here, we have a valid, constructed set. &ip_set_lock again,
2200 +        * find free id/index and check that it is not already in 
2201 +        * ip_set_list.
2202 +        */
2203 +       write_lock_bh(&ip_set_lock);
2204 +       if ((res = find_free_id(set->name, &index, &id)) != 0) {
2205 +               DP("no free id!");
2206 +               goto cleanup;
2207 +       }
2208 +
2209 +       /* Make sure restore gets the same index */
2210 +       if (restore != IP_SET_INVALID_ID && index != restore) {
2211 +               DP("Can't restore, sets are screwed up");
2212 +               res = -ERANGE;
2213 +               goto cleanup;
2214 +       }
2215 +        
2216 +       /*
2217 +        * Finally! Add our shiny new set to the list, and be done.
2218 +        */
2219 +       DP("create: '%s' created with index %u, id %u!", set->name, index, id);
2220 +       set->id = id;
2221 +       ip_set_list[index] = set;
2222 +       write_unlock_bh(&ip_set_lock);
2223 +       return res;
2224 +       
2225 +    cleanup:
2226 +       write_unlock_bh(&ip_set_lock);
2227 +       set->type->destroy(set);
2228 +    put_out:
2229 +       module_put(set->type->me);
2230 +    out:
2231 +       kfree(set);
2232 +       return res;
2233 +}
2234 +
2235 +/*
2236 + * Destroy a given existing set
2237 + */
2238 +static void
2239 +ip_set_destroy_set(ip_set_id_t index)
2240 +{
2241 +       struct ip_set *set = ip_set_list[index];
2242 +
2243 +       IP_SET_ASSERT(set);
2244 +       DP("set: %s",  set->name);
2245 +       write_lock_bh(&ip_set_lock);
2246 +       FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
2247 +       if (set->binding != IP_SET_INVALID_ID)
2248 +               __ip_set_put(set->binding);
2249 +       ip_set_list[index] = NULL;
2250 +       write_unlock_bh(&ip_set_lock);
2251 +
2252 +       /* Must call it without holding any lock */
2253 +       set->type->destroy(set);
2254 +       module_put(set->type->me);
2255 +       kfree(set);
2256 +}
2257 +
2258 +/*
2259 + * Destroy a set - or all sets
2260 + * Sets must not be referenced/used.
2261 + */
2262 +static int
2263 +ip_set_destroy(ip_set_id_t index)
2264 +{
2265 +       ip_set_id_t i;
2266 +
2267 +       /* ref modification always protected by the mutex */
2268 +       if (index != IP_SET_INVALID_ID) {
2269 +               if (atomic_read(&ip_set_list[index]->ref))
2270 +                       return -EBUSY;
2271 +               ip_set_destroy_set(index);
2272 +       } else {
2273 +               for (i = 0; i < ip_set_max; i++) {
2274 +                       if (ip_set_list[i] != NULL 
2275 +                           && (atomic_read(&ip_set_list[i]->ref)))
2276 +                               return -EBUSY;
2277 +               }
2278 +
2279 +               for (i = 0; i < ip_set_max; i++) {
2280 +                       if (ip_set_list[i] != NULL)
2281 +                               ip_set_destroy_set(i);
2282 +               }
2283 +       }
2284 +       return 0;
2285 +}
2286 +
2287 +static void
2288 +ip_set_flush_set(struct ip_set *set)
2289 +{
2290 +       DP("set: %s %u",  set->name, set->id);
2291 +
2292 +       write_lock_bh(&set->lock);
2293 +       set->type->flush(set);
2294 +       write_unlock_bh(&set->lock);
2295 +}
2296 +
2297 +/* 
2298 + * Flush data in a set - or in all sets
2299 + */
2300 +static int
2301 +ip_set_flush(ip_set_id_t index)
2302 +{
2303 +       if (index != IP_SET_INVALID_ID) {
2304 +               IP_SET_ASSERT(ip_set_list[index]);
2305 +               ip_set_flush_set(ip_set_list[index]);
2306 +       } else
2307 +               FOREACH_SET_DO(ip_set_flush_set);
2308 +
2309 +       return 0;
2310 +}
2311 +
2312 +/* Rename a set */
2313 +static int
2314 +ip_set_rename(ip_set_id_t index, const char *name)
2315 +{
2316 +       struct ip_set *set = ip_set_list[index];
2317 +       ip_set_id_t i;
2318 +       int res = 0;
2319 +
2320 +       DP("set: %s to %s",  set->name, name);
2321 +       write_lock_bh(&ip_set_lock);
2322 +       for (i = 0; i < ip_set_max; i++) {
2323 +               if (ip_set_list[i] != NULL
2324 +                   && strncmp(ip_set_list[i]->name, 
2325 +                              name,
2326 +                              IP_SET_MAXNAMELEN - 1) == 0) {
2327 +                       res = -EEXIST;
2328 +                       goto unlock;
2329 +               }
2330 +       }
2331 +       strncpy(set->name, name, IP_SET_MAXNAMELEN);
2332 +    unlock:
2333 +       write_unlock_bh(&ip_set_lock);
2334 +       return res;
2335 +}
2336 +
2337 +/*
2338 + * Swap two sets so that name/index points to the other.
2339 + * References are also swapped.
2340 + */
2341 +static int
2342 +ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index)
2343 +{
2344 +       struct ip_set *from = ip_set_list[from_index];
2345 +       struct ip_set *to = ip_set_list[to_index];
2346 +       char from_name[IP_SET_MAXNAMELEN];
2347 +       u_int32_t from_ref;
2348 +
2349 +       DP("set: %s to %s",  from->name, to->name);
2350 +       /* Features must not change. Artifical restriction. */
2351 +       if (from->type->features != to->type->features)
2352 +               return -ENOEXEC;
2353 +
2354 +       /* No magic here: ref munging protected by the mutex */ 
2355 +       write_lock_bh(&ip_set_lock);
2356 +       strncpy(from_name, from->name, IP_SET_MAXNAMELEN);
2357 +       from_ref = atomic_read(&from->ref);
2358 +
2359 +       strncpy(from->name, to->name, IP_SET_MAXNAMELEN);
2360 +       atomic_set(&from->ref, atomic_read(&to->ref));
2361 +       strncpy(to->name, from_name, IP_SET_MAXNAMELEN);
2362 +       atomic_set(&to->ref, from_ref);
2363 +       
2364 +       ip_set_list[from_index] = to;
2365 +       ip_set_list[to_index] = from;
2366 +       
2367 +       write_unlock_bh(&ip_set_lock);
2368 +       return 0;
2369 +}
2370 +
2371 +/*
2372 + * List set data
2373 + */
2374 +
2375 +static inline void
2376 +__set_hash_bindings_size_list(struct ip_set_hash *set_hash,
2377 +                             ip_set_id_t id, size_t *size)
2378 +{
2379 +       if (set_hash->id == id)
2380 +               *size += sizeof(struct ip_set_hash_list);
2381 +}
2382 +
2383 +static inline void
2384 +__set_hash_bindings_size_save(struct ip_set_hash *set_hash,
2385 +                             ip_set_id_t id, size_t *size)
2386 +{
2387 +       if (set_hash->id == id)
2388 +               *size += sizeof(struct ip_set_hash_save);
2389 +}
2390 +
2391 +static inline void
2392 +__set_hash_bindings(struct ip_set_hash *set_hash,
2393 +                   ip_set_id_t id, void *data, int *used)
2394 +{
2395 +       if (set_hash->id == id) {
2396 +               struct ip_set_hash_list *hash_list = 
2397 +                       (struct ip_set_hash_list *)(data + *used);
2398 +
2399 +               hash_list->ip = set_hash->ip;
2400 +               hash_list->binding = set_hash->binding;
2401 +               *used += sizeof(struct ip_set_hash_list);
2402 +       }
2403 +}
2404 +
2405 +static int ip_set_list_set(ip_set_id_t index,
2406 +                          void *data,
2407 +                          int *used,
2408 +                          int len)
2409 +{
2410 +       struct ip_set *set = ip_set_list[index];
2411 +       struct ip_set_list *set_list;
2412 +
2413 +       /* Pointer to our header */
2414 +       set_list = (struct ip_set_list *) (data + *used);
2415 +
2416 +       DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used);
2417 +
2418 +       /* Get and ensure header size */
2419 +       if (*used + sizeof(struct ip_set_list) > len)
2420 +               goto not_enough_mem;
2421 +       *used += sizeof(struct ip_set_list);
2422 +
2423 +       read_lock_bh(&set->lock);
2424 +       /* Get and ensure set specific header size */
2425 +       set_list->header_size = set->type->header_size;
2426 +       if (*used + set_list->header_size > len)
2427 +               goto unlock_set;
2428 +
2429 +       /* Fill in the header */
2430 +       set_list->index = index;
2431 +       set_list->binding = set->binding;
2432 +       set_list->ref = atomic_read(&set->ref);
2433 +
2434 +       /* Fill in set spefific header data */
2435 +       set->type->list_header(set, data + *used);
2436 +       *used += set_list->header_size;
2437 +
2438 +       /* Get and ensure set specific members size */
2439 +       set_list->members_size = set->type->list_members_size(set);
2440 +       if (*used + set_list->members_size > len)
2441 +               goto unlock_set;
2442 +
2443 +       /* Fill in set spefific members data */
2444 +       set->type->list_members(set, data + *used);
2445 +       *used += set_list->members_size;
2446 +       read_unlock_bh(&set->lock);
2447 +
2448 +       /* Bindings */
2449 +
2450 +       /* Get and ensure set specific bindings size */
2451 +       set_list->bindings_size = 0;
2452 +       FOREACH_HASH_DO(__set_hash_bindings_size_list,
2453 +                       set->id, &set_list->bindings_size);
2454 +       if (*used + set_list->bindings_size > len)
2455 +               goto not_enough_mem;
2456 +
2457 +       /* Fill in set spefific bindings data */
2458 +       FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used);
2459 +       
2460 +       return 0;
2461 +
2462 +    unlock_set:
2463 +       read_unlock_bh(&set->lock);
2464 +    not_enough_mem:
2465 +       DP("not enough mem, try again");
2466 +       return -EAGAIN;
2467 +}
2468 +
2469 +/*
2470 + * Save sets
2471 + */
2472 +static int ip_set_save_set(ip_set_id_t index,
2473 +                          void *data,
2474 +                          int *used,
2475 +                          int len)
2476 +{
2477 +       struct ip_set *set;
2478 +       struct ip_set_save *set_save;
2479 +
2480 +       /* Pointer to our header */
2481 +       set_save = (struct ip_set_save *) (data + *used);
2482 +
2483 +       /* Get and ensure header size */
2484 +       if (*used + sizeof(struct ip_set_save) > len)
2485 +               goto not_enough_mem;
2486 +       *used += sizeof(struct ip_set_save);
2487 +
2488 +       set = ip_set_list[index];
2489 +       DP("set: %s, used: %u(%u) %p %p", set->name, *used, len, 
2490 +          data, data + *used);
2491 +
2492 +       read_lock_bh(&set->lock);
2493 +       /* Get and ensure set specific header size */
2494 +       set_save->header_size = set->type->header_size;
2495 +       if (*used + set_save->header_size > len)
2496 +               goto unlock_set;
2497 +
2498 +       /* Fill in the header */
2499 +       set_save->index = index;
2500 +       set_save->binding = set->binding;
2501 +
2502 +       /* Fill in set spefific header data */
2503 +       set->type->list_header(set, data + *used);
2504 +       *used += set_save->header_size;
2505 +
2506 +       DP("set header filled: %s, used: %u(%u) %p %p", set->name, *used,
2507 +          set_save->header_size, data, data + *used);
2508 +       /* Get and ensure set specific members size */
2509 +       set_save->members_size = set->type->list_members_size(set);
2510 +       if (*used + set_save->members_size > len)
2511 +               goto unlock_set;
2512 +
2513 +       /* Fill in set spefific members data */
2514 +       set->type->list_members(set, data + *used);
2515 +       *used += set_save->members_size;
2516 +       read_unlock_bh(&set->lock);
2517 +       DP("set members filled: %s, used: %u(%u) %p %p", set->name, *used,
2518 +          set_save->members_size, data, data + *used);
2519 +       return 0;
2520 +
2521 +    unlock_set:
2522 +       read_unlock_bh(&set->lock);
2523 +    not_enough_mem:
2524 +       DP("not enough mem, try again");
2525 +       return -EAGAIN;
2526 +}
2527 +
2528 +static inline void
2529 +__set_hash_save_bindings(struct ip_set_hash *set_hash,
2530 +                        ip_set_id_t id,
2531 +                        void *data,
2532 +                        int *used,
2533 +                        int len,
2534 +                        int *res)
2535 +{
2536 +       if (*res == 0
2537 +           && (id == IP_SET_INVALID_ID || set_hash->id == id)) {
2538 +               struct ip_set_hash_save *hash_save = 
2539 +                       (struct ip_set_hash_save *)(data + *used);
2540 +               /* Ensure bindings size */
2541 +               if (*used + sizeof(struct ip_set_hash_save) > len) {
2542 +                       *res = -ENOMEM;
2543 +                       return;
2544 +               }
2545 +               hash_save->id = set_hash->id;
2546 +               hash_save->ip = set_hash->ip;
2547 +               hash_save->binding = set_hash->binding;
2548 +               *used += sizeof(struct ip_set_hash_save);
2549 +       }
2550 +}
2551 +
2552 +static int ip_set_save_bindings(ip_set_id_t index,
2553 +                               void *data,
2554 +                               int *used,
2555 +                               int len)
2556 +{
2557 +       int res = 0;
2558 +       struct ip_set_save *set_save;
2559 +
2560 +       DP("used %u, len %u", *used, len);
2561 +       /* Get and ensure header size */
2562 +       if (*used + sizeof(struct ip_set_save) > len)
2563 +               return -ENOMEM;
2564 +
2565 +       /* Marker */
2566 +       set_save = (struct ip_set_save *) (data + *used);
2567 +       set_save->index = IP_SET_INVALID_ID;
2568 +       set_save->header_size = 0;
2569 +       set_save->members_size = 0;
2570 +       *used += sizeof(struct ip_set_save);
2571 +
2572 +       DP("marker added used %u, len %u", *used, len);
2573 +       /* Fill in bindings data */
2574 +       if (index != IP_SET_INVALID_ID)
2575 +               /* Sets are identified by id in hash */
2576 +               index = ip_set_list[index]->id;
2577 +       FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res);
2578 +
2579 +       return res;     
2580 +}
2581 +
2582 +/*
2583 + * Restore sets
2584 + */
2585 +static int ip_set_restore(void *data,
2586 +                         int len)
2587 +{
2588 +       int res = 0;
2589 +       int line = 0, used = 0, members_size;
2590 +       struct ip_set *set;
2591 +       struct ip_set_hash_save *hash_save;
2592 +       struct ip_set_restore *set_restore;
2593 +       ip_set_id_t index;
2594 +
2595 +       /* Loop to restore sets */
2596 +       while (1) {
2597 +               line++;
2598 +               
2599 +               DP("%u %u %u", used, sizeof(struct ip_set_restore), len);
2600 +               /* Get and ensure header size */
2601 +               if (used + sizeof(struct ip_set_restore) > len)
2602 +                       return line;
2603 +               set_restore = (struct ip_set_restore *) (data + used);
2604 +               used += sizeof(struct ip_set_restore);
2605 +
2606 +               /* Ensure data size */
2607 +               if (used 
2608 +                   + set_restore->header_size 
2609 +                   + set_restore->members_size > len)
2610 +                       return line;
2611 +
2612 +               /* Check marker */
2613 +               if (set_restore->index == IP_SET_INVALID_ID) {
2614 +                       line--;
2615 +                       goto bindings;
2616 +               }
2617 +               
2618 +               /* Try to create the set */
2619 +               DP("restore %s %s", set_restore->name, set_restore->typename);
2620 +               res = ip_set_create(set_restore->name,
2621 +                                   set_restore->typename,
2622 +                                   set_restore->index,
2623 +                                   data + used,
2624 +                                   set_restore->header_size);
2625 +               
2626 +               if (res != 0)
2627 +                       return line;
2628 +               used += set_restore->header_size;
2629 +
2630 +               index = ip_set_find_byindex(set_restore->index);
2631 +               DP("index %u, restore_index %u", index, set_restore->index);
2632 +               if (index != set_restore->index)
2633 +                       return line;
2634 +               /* Try to restore members data */
2635 +               set = ip_set_list[index];
2636 +               members_size = 0;
2637 +               DP("members_size %u reqsize %u",
2638 +                  set_restore->members_size, set->type->reqsize);
2639 +               while (members_size + set->type->reqsize <=
2640 +                      set_restore->members_size) {
2641 +                       line++;
2642 +                       DP("members: %u, line %u", members_size, line);
2643 +                       res = __ip_set_addip(index,
2644 +                                          data + used + members_size,
2645 +                                          set->type->reqsize);
2646 +                       if (!(res == 0 || res == -EEXIST)) 
2647 +                               return line;
2648 +                       members_size += set->type->reqsize;
2649 +               }
2650 +
2651 +               DP("members_size %u  %u",
2652 +                  set_restore->members_size, members_size);
2653 +               if (members_size != set_restore->members_size)
2654 +                       return line++;
2655 +               used += set_restore->members_size;              
2656 +       }
2657 +       
2658 +   bindings:
2659 +       /* Loop to restore bindings */
2660 +       while (used < len) {
2661 +               line++;
2662 +
2663 +               DP("restore binding, line %u", line);           
2664 +               /* Get and ensure size */
2665 +               if (used + sizeof(struct ip_set_hash_save) > len)
2666 +                       return line;
2667 +               hash_save = (struct ip_set_hash_save *) (data + used);
2668 +               used += sizeof(struct ip_set_hash_save);
2669 +               
2670 +               /* hash_save->id is used to store the index */
2671 +               index = ip_set_find_byindex(hash_save->id);
2672 +               DP("restore binding index %u, id %u, %u -> %u",
2673 +                  index, hash_save->id, hash_save->ip, hash_save->binding);            
2674 +               if (index != hash_save->id)
2675 +                       return line;
2676 +               if (ip_set_find_byindex(hash_save->binding) == IP_SET_INVALID_ID) {
2677 +                       DP("corrupt binding set index %u", hash_save->binding);
2678 +                       return line;
2679 +               }
2680 +               set = ip_set_list[hash_save->id];
2681 +               /* Null valued IP means default binding */
2682 +               if (hash_save->ip)
2683 +                       res = ip_set_hash_add(set->id, 
2684 +                                             hash_save->ip,
2685 +                                             hash_save->binding);
2686 +               else {
2687 +                       IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID);
2688 +                       write_lock_bh(&ip_set_lock);
2689 +                       set->binding = hash_save->binding;
2690 +                       __ip_set_get(set->binding);
2691 +                       write_unlock_bh(&ip_set_lock);
2692 +                       DP("default binding: %u", set->binding);
2693 +               }
2694 +               if (res != 0)
2695 +                       return line;
2696 +       }
2697 +       if (used != len)
2698 +               return line;
2699 +       
2700 +       return 0;       
2701 +}
2702 +
2703 +static int
2704 +ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len)
2705 +{
2706 +       void *data;
2707 +       int res = 0;            /* Assume OK */
2708 +       unsigned *op;
2709 +       struct ip_set_req_adt *req_adt;
2710 +       ip_set_id_t index = IP_SET_INVALID_ID;
2711 +       int (*adtfn)(ip_set_id_t index,
2712 +                    const void *data, size_t size);
2713 +       struct fn_table {
2714 +               int (*fn)(ip_set_id_t index,
2715 +                         const void *data, size_t size);
2716 +       } adtfn_table[] =
2717 +       { { ip_set_addip }, { ip_set_delip }, { ip_set_testip},
2718 +         { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind },
2719 +       };
2720 +
2721 +       DP("optval=%d, user=%p, len=%d", optval, user, len);
2722 +       if (!capable(CAP_NET_ADMIN))
2723 +               return -EPERM;
2724 +       if (optval != SO_IP_SET)
2725 +               return -EBADF;
2726 +       if (len <= sizeof(unsigned)) {
2727 +               ip_set_printk("short userdata (want >%zu, got %u)",
2728 +                             sizeof(unsigned), len);
2729 +               return -EINVAL;
2730 +       }
2731 +       data = vmalloc(len);
2732 +       if (!data) {
2733 +               DP("out of mem for %u bytes", len);
2734 +               return -ENOMEM;
2735 +       }
2736 +       if (copy_from_user(data, user, len) != 0) {
2737 +               res = -EFAULT;
2738 +               goto done;
2739 +       }
2740 +       if (down_interruptible(&ip_set_app_mutex)) {
2741 +               res = -EINTR;
2742 +               goto done;
2743 +       }
2744 +
2745 +       op = (unsigned *)data;
2746 +       DP("op=%x", *op);
2747 +       
2748 +       if (*op < IP_SET_OP_VERSION) {
2749 +               /* Check the version at the beginning of operations */
2750 +               struct ip_set_req_version *req_version =
2751 +                       (struct ip_set_req_version *) data;
2752 +               if (req_version->version != IP_SET_PROTOCOL_VERSION) {
2753 +                       res = -EPROTO;
2754 +                       goto done;
2755 +               }
2756 +       }
2757 +
2758 +       switch (*op) {
2759 +       case IP_SET_OP_CREATE:{
2760 +               struct ip_set_req_create *req_create
2761 +                       = (struct ip_set_req_create *) data;
2762 +               
2763 +               if (len < sizeof(struct ip_set_req_create)) {
2764 +                       ip_set_printk("short CREATE data (want >=%zu, got %u)",
2765 +                                     sizeof(struct ip_set_req_create), len);
2766 +                       res = -EINVAL;
2767 +                       goto done;
2768 +               }
2769 +               req_create->name[IP_SET_MAXNAMELEN - 1] = '\0';
2770 +               req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0';
2771 +               res = ip_set_create(req_create->name,
2772 +                                   req_create->typename,
2773 +                                   IP_SET_INVALID_ID,
2774 +                                   data + sizeof(struct ip_set_req_create),
2775 +                                   len - sizeof(struct ip_set_req_create));
2776 +               goto done;
2777 +       }
2778 +       case IP_SET_OP_DESTROY:{
2779 +               struct ip_set_req_std *req_destroy
2780 +                       = (struct ip_set_req_std *) data;
2781 +               
2782 +               if (len != sizeof(struct ip_set_req_std)) {
2783 +                       ip_set_printk("invalid DESTROY data (want %zu, got %u)",
2784 +                                     sizeof(struct ip_set_req_std), len);
2785 +                       res = -EINVAL;
2786 +                       goto done;
2787 +               }
2788 +               if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) {
2789 +                       /* Destroy all sets */
2790 +                       index = IP_SET_INVALID_ID;
2791 +               } else {
2792 +                       req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0';
2793 +                       index = ip_set_find_byname(req_destroy->name);
2794 +
2795 +                       if (index == IP_SET_INVALID_ID) {
2796 +                               res = -ENOENT;
2797 +                               goto done;
2798 +                       }
2799 +               }
2800 +                       
2801 +               res = ip_set_destroy(index);
2802 +               goto done;
2803 +       }
2804 +       case IP_SET_OP_FLUSH:{
2805 +               struct ip_set_req_std *req_flush =
2806 +                       (struct ip_set_req_std *) data;
2807 +
2808 +               if (len != sizeof(struct ip_set_req_std)) {
2809 +                       ip_set_printk("invalid FLUSH data (want %zu, got %u)",
2810 +                                     sizeof(struct ip_set_req_std), len);
2811 +                       res = -EINVAL;
2812 +                       goto done;
2813 +               }
2814 +               if (strcmp(req_flush->name, IPSET_TOKEN_ALL) == 0) {
2815 +                       /* Flush all sets */
2816 +                       index = IP_SET_INVALID_ID;
2817 +               } else {
2818 +                       req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0';
2819 +                       index = ip_set_find_byname(req_flush->name);
2820 +
2821 +                       if (index == IP_SET_INVALID_ID) {
2822 +                               res = -ENOENT;
2823 +                               goto done;
2824 +                       }
2825 +               }
2826 +               res = ip_set_flush(index);
2827 +               goto done;
2828 +       }
2829 +       case IP_SET_OP_RENAME:{
2830 +               struct ip_set_req_create *req_rename
2831 +                       = (struct ip_set_req_create *) data;
2832 +
2833 +               if (len != sizeof(struct ip_set_req_create)) {
2834 +                       ip_set_printk("invalid RENAME data (want %zu, got %u)",
2835 +                                     sizeof(struct ip_set_req_create), len);
2836 +                       res = -EINVAL;
2837 +                       goto done;
2838 +               }
2839 +
2840 +               req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0';
2841 +               req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0';
2842 +                       
2843 +               index = ip_set_find_byname(req_rename->name);
2844 +               if (index == IP_SET_INVALID_ID) {
2845 +                       res = -ENOENT;
2846 +                       goto done;
2847 +               }
2848 +               res = ip_set_rename(index, req_rename->typename);
2849 +               goto done;
2850 +       }
2851 +       case IP_SET_OP_SWAP:{
2852 +               struct ip_set_req_create *req_swap
2853 +                       = (struct ip_set_req_create *) data;
2854 +               ip_set_id_t to_index;
2855 +
2856 +               if (len != sizeof(struct ip_set_req_create)) {
2857 +                       ip_set_printk("invalid SWAP data (want %zu, got %u)",
2858 +                                     sizeof(struct ip_set_req_create), len);
2859 +                       res = -EINVAL;
2860 +                       goto done;
2861 +               }
2862 +
2863 +               req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0';
2864 +               req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0';
2865 +
2866 +               index = ip_set_find_byname(req_swap->name);
2867 +               if (index == IP_SET_INVALID_ID) {
2868 +                       res = -ENOENT;
2869 +                       goto done;
2870 +               }
2871 +               to_index = ip_set_find_byname(req_swap->typename);
2872 +               if (to_index == IP_SET_INVALID_ID) {
2873 +                       res = -ENOENT;
2874 +                       goto done;
2875 +               }
2876 +               res = ip_set_swap(index, to_index);
2877 +               goto done;
2878 +       }
2879 +       default: 
2880 +               break;  /* Set identified by id */
2881 +       }
2882 +       
2883 +       /* There we may have add/del/test/bind/unbind/test_bind operations */
2884 +       if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_BIND_SET) {
2885 +               res = -EBADMSG;
2886 +               goto done;
2887 +       }
2888 +       adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn;
2889 +
2890 +       if (len < sizeof(struct ip_set_req_adt)) {
2891 +               ip_set_printk("short data in adt request (want >=%zu, got %u)",
2892 +                             sizeof(struct ip_set_req_adt), len);
2893 +               res = -EINVAL;
2894 +               goto done;
2895 +       }
2896 +       req_adt = (struct ip_set_req_adt *) data;
2897 +
2898 +       /* -U :all: :all:|:default: uses IP_SET_INVALID_ID */
2899 +       if (!(*op == IP_SET_OP_UNBIND_SET 
2900 +             && req_adt->index == IP_SET_INVALID_ID)) {
2901 +               index = ip_set_find_byindex(req_adt->index);
2902 +               if (index == IP_SET_INVALID_ID) {
2903 +                       res = -ENOENT;
2904 +                       goto done;
2905 +               }
2906 +       }
2907 +       res = adtfn(index, data, len);
2908 +
2909 +    done:
2910 +       up(&ip_set_app_mutex);
2911 +       vfree(data);
2912 +       if (res > 0)
2913 +               res = 0;
2914 +       DP("final result %d", res);
2915 +       return res;
2916 +}
2917 +
2918 +static int 
2919 +ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len)
2920 +{
2921 +       int res = 0;
2922 +       unsigned *op;
2923 +       ip_set_id_t index = IP_SET_INVALID_ID;
2924 +       void *data;
2925 +       int copylen = *len;
2926 +
2927 +       DP("optval=%d, user=%p, len=%d", optval, user, *len);
2928 +       if (!capable(CAP_NET_ADMIN))
2929 +               return -EPERM;
2930 +       if (optval != SO_IP_SET)
2931 +               return -EBADF;
2932 +       if (*len < sizeof(unsigned)) {
2933 +               ip_set_printk("short userdata (want >=%zu, got %d)",
2934 +                             sizeof(unsigned), *len);
2935 +               return -EINVAL;
2936 +       }
2937 +       data = vmalloc(*len);
2938 +       if (!data) {
2939 +               DP("out of mem for %d bytes", *len);
2940 +               return -ENOMEM;
2941 +       }
2942 +       if (copy_from_user(data, user, *len) != 0) {
2943 +               res = -EFAULT;
2944 +               goto done;
2945 +       }
2946 +       if (down_interruptible(&ip_set_app_mutex)) {
2947 +               res = -EINTR;
2948 +               goto done;
2949 +       }
2950 +
2951 +       op = (unsigned *) data;
2952 +       DP("op=%x", *op);
2953 +
2954 +       if (*op < IP_SET_OP_VERSION) {
2955 +               /* Check the version at the beginning of operations */
2956 +               struct ip_set_req_version *req_version =
2957 +                       (struct ip_set_req_version *) data;
2958 +               if (req_version->version != IP_SET_PROTOCOL_VERSION) {
2959 +                       res = -EPROTO;
2960 +                       goto done;
2961 +               }
2962 +       }
2963 +
2964 +       switch (*op) {
2965 +       case IP_SET_OP_VERSION: {
2966 +               struct ip_set_req_version *req_version =
2967 +                   (struct ip_set_req_version *) data;
2968 +
2969 +               if (*len != sizeof(struct ip_set_req_version)) {
2970 +                       ip_set_printk("invalid VERSION (want %zu, got %d)",
2971 +                                     sizeof(struct ip_set_req_version),
2972 +                                     *len);
2973 +                       res = -EINVAL;
2974 +                       goto done;
2975 +               }
2976 +
2977 +               req_version->version = IP_SET_PROTOCOL_VERSION;
2978 +               res = copy_to_user(user, req_version,
2979 +                                  sizeof(struct ip_set_req_version));
2980 +               goto done;
2981 +       }
2982 +       case IP_SET_OP_GET_BYNAME: {
2983 +               struct ip_set_req_get_set *req_get
2984 +                       = (struct ip_set_req_get_set *) data;
2985 +
2986 +               if (*len != sizeof(struct ip_set_req_get_set)) {
2987 +                       ip_set_printk("invalid GET_BYNAME (want %zu, got %d)",
2988 +                                     sizeof(struct ip_set_req_get_set), *len);
2989 +                       res = -EINVAL;
2990 +                       goto done;
2991 +               }
2992 +               req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
2993 +               index = ip_set_find_byname(req_get->set.name);
2994 +               req_get->set.index = index;
2995 +               goto copy;
2996 +       }
2997 +       case IP_SET_OP_GET_BYINDEX: {
2998 +               struct ip_set_req_get_set *req_get
2999 +                       = (struct ip_set_req_get_set *) data;
3000 +
3001 +               if (*len != sizeof(struct ip_set_req_get_set)) {
3002 +                       ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)",
3003 +                                     sizeof(struct ip_set_req_get_set), *len);
3004 +                       res = -EINVAL;
3005 +                       goto done;
3006 +               }
3007 +               req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
3008 +               index = ip_set_find_byindex(req_get->set.index);
3009 +               strncpy(req_get->set.name,
3010 +                       index == IP_SET_INVALID_ID ? ""
3011 +                       : ip_set_list[index]->name, IP_SET_MAXNAMELEN);
3012 +               goto copy;
3013 +       }
3014 +       case IP_SET_OP_ADT_GET: {
3015 +               struct ip_set_req_adt_get *req_get
3016 +                       = (struct ip_set_req_adt_get *) data;
3017 +
3018 +               if (*len != sizeof(struct ip_set_req_adt_get)) {
3019 +                       ip_set_printk("invalid ADT_GET (want %zu, got %d)",
3020 +                                     sizeof(struct ip_set_req_adt_get), *len);
3021 +                       res = -EINVAL;
3022 +                       goto done;
3023 +               }
3024 +               req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
3025 +               index = ip_set_find_byname(req_get->set.name);
3026 +               if (index != IP_SET_INVALID_ID) {
3027 +                       req_get->set.index = index;
3028 +                       strncpy(req_get->typename,
3029 +                               ip_set_list[index]->type->typename,
3030 +                               IP_SET_MAXNAMELEN - 1);
3031 +               } else {
3032 +                       res = -ENOENT;
3033 +                       goto done;
3034 +               }
3035 +               goto copy;
3036 +       }
3037 +       case IP_SET_OP_MAX_SETS: {
3038 +               struct ip_set_req_max_sets *req_max_sets
3039 +                       = (struct ip_set_req_max_sets *) data;
3040 +               ip_set_id_t i;
3041 +
3042 +               if (*len != sizeof(struct ip_set_req_max_sets)) {
3043 +                       ip_set_printk("invalid MAX_SETS (want %zu, got %d)",
3044 +                                     sizeof(struct ip_set_req_max_sets), *len);
3045 +                       res = -EINVAL;
3046 +                       goto done;
3047 +               }
3048 +
3049 +               if (strcmp(req_max_sets->set.name, IPSET_TOKEN_ALL) == 0) {
3050 +                       req_max_sets->set.index = IP_SET_INVALID_ID;
3051 +               } else {
3052 +                       req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
3053 +                       req_max_sets->set.index = 
3054 +                               ip_set_find_byname(req_max_sets->set.name);
3055 +                       if (req_max_sets->set.index == IP_SET_INVALID_ID) {
3056 +                               res = -ENOENT;
3057 +                               goto done;
3058 +                       }
3059 +               }
3060 +               req_max_sets->max_sets = ip_set_max;
3061 +               req_max_sets->sets = 0;
3062 +               for (i = 0; i < ip_set_max; i++) {
3063 +                       if (ip_set_list[i] != NULL)
3064 +                               req_max_sets->sets++;
3065 +               }
3066 +               goto copy;
3067 +       }
3068 +       case IP_SET_OP_LIST_SIZE: 
3069 +       case IP_SET_OP_SAVE_SIZE: {
3070 +               struct ip_set_req_setnames *req_setnames
3071 +                       = (struct ip_set_req_setnames *) data;
3072 +               struct ip_set_name_list *name_list;
3073 +               struct ip_set *set;
3074 +               ip_set_id_t i;
3075 +               int used;
3076 +
3077 +               if (*len < sizeof(struct ip_set_req_setnames)) {
3078 +                       ip_set_printk("short LIST_SIZE (want >=%zu, got %d)",
3079 +                                     sizeof(struct ip_set_req_setnames), *len);
3080 +                       res = -EINVAL;
3081 +                       goto done;
3082 +               }
3083 +
3084 +               req_setnames->size = 0;
3085 +               used = sizeof(struct ip_set_req_setnames);
3086 +               for (i = 0; i < ip_set_max; i++) {
3087 +                       if (ip_set_list[i] == NULL)
3088 +                               continue;
3089 +                       name_list = (struct ip_set_name_list *) 
3090 +                               (data + used);
3091 +                       used += sizeof(struct ip_set_name_list);
3092 +                       if (used > copylen) {
3093 +                               res = -EAGAIN;
3094 +                               goto done;
3095 +                       }
3096 +                       set = ip_set_list[i];
3097 +                       /* Fill in index, name, etc. */
3098 +                       name_list->index = i;
3099 +                       name_list->id = set->id;
3100 +                       strncpy(name_list->name,
3101 +                               set->name,
3102 +                               IP_SET_MAXNAMELEN - 1);
3103 +                       strncpy(name_list->typename,
3104 +                               set->type->typename,
3105 +                               IP_SET_MAXNAMELEN - 1);
3106 +                       DP("filled %s of type %s, index %u\n",
3107 +                          name_list->name, name_list->typename,
3108 +                          name_list->index);
3109 +                       if (!(req_setnames->index == IP_SET_INVALID_ID
3110 +                             || req_setnames->index == i))
3111 +                             continue;
3112 +                       /* Update size */
3113 +                       switch (*op) {
3114 +                       case IP_SET_OP_LIST_SIZE: {
3115 +                               req_setnames->size += sizeof(struct ip_set_list)
3116 +                                       + set->type->header_size
3117 +                                       + set->type->list_members_size(set);
3118 +                               /* Sets are identified by id in the hash */
3119 +                               FOREACH_HASH_DO(__set_hash_bindings_size_list, 
3120 +                                               set->id, &req_setnames->size);
3121 +                               break;
3122 +                       }
3123 +                       case IP_SET_OP_SAVE_SIZE: {
3124 +                               req_setnames->size += sizeof(struct ip_set_save)
3125 +                                       + set->type->header_size
3126 +                                       + set->type->list_members_size(set);
3127 +                               FOREACH_HASH_DO(__set_hash_bindings_size_save,
3128 +                                               set->id, &req_setnames->size);
3129 +                               break;
3130 +                       }
3131 +                       default:
3132 +                               break;
3133 +                       }
3134 +               }
3135 +               if (copylen != used) {
3136 +                       res = -EAGAIN;
3137 +                       goto done;
3138 +               }
3139 +               goto copy;
3140 +       }
3141 +       case IP_SET_OP_LIST: {
3142 +               struct ip_set_req_list *req_list
3143 +                       = (struct ip_set_req_list *) data;
3144 +               ip_set_id_t i;
3145 +               int used;
3146 +
3147 +               if (*len < sizeof(struct ip_set_req_list)) {
3148 +                       ip_set_printk("short LIST (want >=%zu, got %d)",
3149 +                                     sizeof(struct ip_set_req_list), *len);
3150 +                       res = -EINVAL;
3151 +                       goto done;
3152 +               }
3153 +               index = req_list->index;
3154 +               if (index != IP_SET_INVALID_ID
3155 +                   && ip_set_find_byindex(index) != index) {
3156 +                       res = -ENOENT;
3157 +                       goto done;
3158 +               }
3159 +               used = 0;
3160 +               if (index == IP_SET_INVALID_ID) {
3161 +                       /* List all sets */
3162 +                       for (i = 0; i < ip_set_max && res == 0; i++) {
3163 +                               if (ip_set_list[i] != NULL)
3164 +                                       res = ip_set_list_set(i, data, &used, *len);
3165 +                       }
3166 +               } else {
3167 +                       /* List an individual set */
3168 +                       res = ip_set_list_set(index, data, &used, *len);
3169 +               }
3170 +               if (res != 0)
3171 +                       goto done;
3172 +               else if (copylen != used) {
3173 +                       res = -EAGAIN;
3174 +                       goto done;
3175 +               }
3176 +               goto copy;
3177 +       }
3178 +       case IP_SET_OP_SAVE: {
3179 +               struct ip_set_req_list *req_save
3180 +                       = (struct ip_set_req_list *) data;
3181 +               ip_set_id_t i;
3182 +               int used;
3183 +
3184 +               if (*len < sizeof(struct ip_set_req_list)) {
3185 +                       ip_set_printk("short SAVE (want >=%zu, got %d)",
3186 +                                     sizeof(struct ip_set_req_list), *len);
3187 +                       res = -EINVAL;
3188 +                       goto done;
3189 +               }
3190 +               index = req_save->index;
3191 +               if (index != IP_SET_INVALID_ID
3192 +                   && ip_set_find_byindex(index) != index) {
3193 +                       res = -ENOENT;
3194 +                       goto done;
3195 +               }
3196 +               used = 0;
3197 +               if (index == IP_SET_INVALID_ID) {
3198 +                       /* Save all sets */
3199 +                       for (i = 0; i < ip_set_max && res == 0; i++) {
3200 +                               if (ip_set_list[i] != NULL)
3201 +                                       res = ip_set_save_set(i, data, &used, *len);
3202 +                       }
3203 +               } else {
3204 +                       /* Save an individual set */
3205 +                       res = ip_set_save_set(index, data, &used, *len);
3206 +               }
3207 +               if (res == 0)
3208 +                       res = ip_set_save_bindings(index, data, &used, *len);
3209 +                       
3210 +               if (res != 0)
3211 +                       goto done;
3212 +               else if (copylen != used) {
3213 +                       res = -EAGAIN;
3214 +                       goto done;
3215 +               }
3216 +               goto copy;
3217 +       }
3218 +       case IP_SET_OP_RESTORE: {
3219 +               struct ip_set_req_setnames *req_restore
3220 +                       = (struct ip_set_req_setnames *) data;
3221 +               int line;
3222 +
3223 +               if (*len < sizeof(struct ip_set_req_setnames)
3224 +                   || *len != req_restore->size) {
3225 +                       ip_set_printk("invalid RESTORE (want =%zu, got %d)",
3226 +                                     req_restore->size, *len);
3227 +                       res = -EINVAL;
3228 +                       goto done;
3229 +               }
3230 +               line = ip_set_restore(data + sizeof(struct ip_set_req_setnames),
3231 +                                     req_restore->size - sizeof(struct ip_set_req_setnames));
3232 +               DP("ip_set_restore: %u", line);
3233 +               if (line != 0) {
3234 +                       res = -EAGAIN;
3235 +                       req_restore->size = line;
3236 +                       copylen = sizeof(struct ip_set_req_setnames);
3237 +                       goto copy;
3238 +               }
3239 +               goto done;
3240 +       }
3241 +       default:
3242 +               res = -EBADMSG;
3243 +               goto done;
3244 +       }       /* end of switch(op) */
3245 +
3246 +    copy:
3247 +       DP("set %s, copylen %u", index != IP_SET_INVALID_ID
3248 +                                && ip_set_list[index]
3249 +                    ? ip_set_list[index]->name
3250 +                    : ":all:", copylen);
3251 +       res = copy_to_user(user, data, copylen);
3252 +       
3253 +    done:
3254 +       up(&ip_set_app_mutex);
3255 +       vfree(data);
3256 +       if (res > 0)
3257 +               res = 0;
3258 +       DP("final result %d", res);
3259 +       return res;
3260 +}
3261 +
3262 +static struct nf_sockopt_ops so_set = {
3263 +       .pf             = PF_INET,
3264 +       .set_optmin     = SO_IP_SET,
3265 +       .set_optmax     = SO_IP_SET + 1,
3266 +       .set            = &ip_set_sockfn_set,
3267 +       .get_optmin     = SO_IP_SET,
3268 +       .get_optmax     = SO_IP_SET + 1,
3269 +       .get            = &ip_set_sockfn_get,
3270 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
3271 +       .use            = 0,
3272 +#else
3273 +       .owner          = THIS_MODULE,
3274 +#endif
3275 +};
3276 +
3277 +static int max_sets, hash_size;
3278 +module_param(max_sets, int, 0600);
3279 +MODULE_PARM_DESC(max_sets, "maximal number of sets");
3280 +module_param(hash_size, int, 0600);
3281 +MODULE_PARM_DESC(hash_size, "hash size for bindings");
3282 +MODULE_LICENSE("GPL");
3283 +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
3284 +MODULE_DESCRIPTION("module implementing core IP set support");
3285 +
3286 +static int __init ip_set_init(void)
3287 +{
3288 +       int res;
3289 +       ip_set_id_t i;
3290 +
3291 +       get_random_bytes(&ip_set_hash_random, 4);
3292 +       if (max_sets)
3293 +               ip_set_max = max_sets;
3294 +       ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max);
3295 +       if (!ip_set_list) {
3296 +               printk(KERN_ERR "Unable to create ip_set_list\n");
3297 +               return -ENOMEM;
3298 +       }
3299 +       memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max);
3300 +       if (hash_size)
3301 +               ip_set_bindings_hash_size = hash_size;
3302 +       ip_set_hash = vmalloc(sizeof(struct list_head) * ip_set_bindings_hash_size);
3303 +       if (!ip_set_hash) {
3304 +               printk(KERN_ERR "Unable to create ip_set_hash\n");
3305 +               vfree(ip_set_list);
3306 +               return -ENOMEM;
3307 +       }
3308 +       for (i = 0; i < ip_set_bindings_hash_size; i++)
3309 +               INIT_LIST_HEAD(&ip_set_hash[i]);
3310 +
3311 +       INIT_LIST_HEAD(&set_type_list);
3312 +
3313 +       res = nf_register_sockopt(&so_set);
3314 +       if (res != 0) {
3315 +               ip_set_printk("SO_SET registry failed: %d", res);
3316 +               vfree(ip_set_list);
3317 +               vfree(ip_set_hash);
3318 +               return res;
3319 +       }
3320 +       return 0;
3321 +}
3322 +
3323 +static void __exit ip_set_fini(void)
3324 +{
3325 +       /* There can't be any existing set or binding */
3326 +       nf_unregister_sockopt(&so_set);
3327 +       vfree(ip_set_list);
3328 +       vfree(ip_set_hash);
3329 +       DP("these are the famous last words");
3330 +}
3331 +
3332 +EXPORT_SYMBOL(ip_set_register_set_type);
3333 +EXPORT_SYMBOL(ip_set_unregister_set_type);
3334 +
3335 +EXPORT_SYMBOL(ip_set_get_byname);
3336 +EXPORT_SYMBOL(ip_set_get_byindex);
3337 +EXPORT_SYMBOL(ip_set_put);
3338 +
3339 +EXPORT_SYMBOL(ip_set_addip_kernel);
3340 +EXPORT_SYMBOL(ip_set_delip_kernel);
3341 +EXPORT_SYMBOL(ip_set_testip_kernel);
3342 +
3343 +module_init(ip_set_init);
3344 +module_exit(ip_set_fini);
3345 diff --git a/net/ipv4/netfilter/ip_set_iphash.c b/net/ipv4/netfilter/ip_set_iphash.c
3346 new file mode 100644
3347 index 0000000..63f7ac9
3348 --- /dev/null
3349 +++ b/net/ipv4/netfilter/ip_set_iphash.c
3350 @@ -0,0 +1,429 @@
3351 +/* Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
3352 + *
3353 + * This program is free software; you can redistribute it and/or modify
3354 + * it under the terms of the GNU General Public License version 2 as
3355 + * published by the Free Software Foundation.  
3356 + */
3357 +
3358 +/* Kernel module implementing an ip hash set */
3359 +
3360 +#include <linux/module.h>
3361 +#include <linux/ip.h>
3362 +#include <linux/skbuff.h>
3363 +#include <linux/version.h>
3364 +#include <linux/jhash.h>
3365 +#include <linux/netfilter_ipv4/ip_tables.h>
3366 +#include <linux/netfilter_ipv4/ip_set.h>
3367 +#include <linux/errno.h>
3368 +#include <asm/uaccess.h>
3369 +#include <asm/bitops.h>
3370 +#include <linux/spinlock.h>
3371 +#include <linux/vmalloc.h>
3372 +#include <linux/random.h>
3373 +
3374 +#include <net/ip.h>
3375 +
3376 +#include <linux/netfilter_ipv4/ip_set_malloc.h>
3377 +#include <linux/netfilter_ipv4/ip_set_iphash.h>
3378 +
3379 +static int limit = MAX_RANGE;
3380 +
3381 +static inline __u32
3382 +jhash_ip(const struct ip_set_iphash *map, uint16_t i, ip_set_ip_t ip)
3383 +{
3384 +       return jhash_1word(ip, *(((uint32_t *) map->initval) + i));
3385 +}
3386 +
3387 +static inline __u32
3388 +hash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3389 +{
3390 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3391 +       __u32 id;
3392 +       u_int16_t i;
3393 +       ip_set_ip_t *elem;
3394 +
3395 +       *hash_ip = ip & map->netmask;
3396 +       DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u, %u.%u.%u.%u",
3397 +          set->name, HIPQUAD(ip), HIPQUAD(*hash_ip), HIPQUAD(map->netmask));
3398 +       
3399 +       for (i = 0; i < map->probes; i++) {
3400 +               id = jhash_ip(map, i, *hash_ip) % map->hashsize;
3401 +               DP("hash key: %u", id);
3402 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id);
3403 +               if (*elem == *hash_ip)
3404 +                       return id;
3405 +               /* No shortcut at testing - there can be deleted
3406 +                * entries. */
3407 +       }
3408 +       return UINT_MAX;
3409 +}
3410 +
3411 +static inline int
3412 +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3413 +{
3414 +       return (ip && hash_id(set, ip, hash_ip) != UINT_MAX);
3415 +}
3416 +
3417 +static int
3418 +testip(struct ip_set *set, const void *data, size_t size,
3419 +       ip_set_ip_t *hash_ip)
3420 +{
3421 +       struct ip_set_req_iphash *req = 
3422 +           (struct ip_set_req_iphash *) data;
3423 +
3424 +       if (size != sizeof(struct ip_set_req_iphash)) {
3425 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3426 +                             sizeof(struct ip_set_req_iphash),
3427 +                             size);
3428 +               return -EINVAL;
3429 +       }
3430 +       return __testip(set, req->ip, hash_ip);
3431 +}
3432 +
3433 +static int
3434 +testip_kernel(struct ip_set *set, 
3435 +             const struct sk_buff *skb,
3436 +             ip_set_ip_t *hash_ip,
3437 +             const u_int32_t *flags,
3438 +             unsigned char index)
3439 +{
3440 +       return __testip(set,
3441 +                       ntohl(flags[index] & IPSET_SRC 
3442 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
3443 +                               ? ip_hdr(skb)->saddr 
3444 +                               : ip_hdr(skb)->daddr),
3445 +#else
3446 +                               ? skb->nh.iph->saddr 
3447 +                               : skb->nh.iph->daddr),
3448 +#endif
3449 +                       hash_ip);
3450 +}
3451 +
3452 +static inline int
3453 +__addip(struct ip_set_iphash *map, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3454 +{
3455 +       __u32 probe;
3456 +       u_int16_t i;
3457 +       ip_set_ip_t *elem;
3458 +       
3459 +       if (!ip || map->elements >= limit)
3460 +               return -ERANGE;
3461 +
3462 +       *hash_ip = ip & map->netmask;
3463 +       
3464 +       for (i = 0; i < map->probes; i++) {
3465 +               probe = jhash_ip(map, i, *hash_ip) % map->hashsize;
3466 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, probe);
3467 +               if (*elem == *hash_ip)
3468 +                       return -EEXIST;
3469 +               if (!*elem) {
3470 +                       *elem = *hash_ip;
3471 +                       map->elements++;
3472 +                       return 0;
3473 +               }
3474 +       }
3475 +       /* Trigger rehashing */
3476 +       return -EAGAIN;
3477 +}
3478 +
3479 +static int
3480 +addip(struct ip_set *set, const void *data, size_t size,
3481 +        ip_set_ip_t *hash_ip)
3482 +{
3483 +       struct ip_set_req_iphash *req = 
3484 +           (struct ip_set_req_iphash *) data;
3485 +
3486 +       if (size != sizeof(struct ip_set_req_iphash)) {
3487 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3488 +                             sizeof(struct ip_set_req_iphash),
3489 +                             size);
3490 +               return -EINVAL;
3491 +       }
3492 +       return __addip((struct ip_set_iphash *) set->data, req->ip, hash_ip);
3493 +}
3494 +
3495 +static int
3496 +addip_kernel(struct ip_set *set, 
3497 +            const struct sk_buff *skb,
3498 +            ip_set_ip_t *hash_ip,
3499 +            const u_int32_t *flags,
3500 +            unsigned char index)
3501 +{
3502 +       return __addip((struct ip_set_iphash *) set->data,
3503 +                      ntohl(flags[index] & IPSET_SRC 
3504 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
3505 +                               ? ip_hdr(skb)->saddr 
3506 +                               : ip_hdr(skb)->daddr),
3507 +#else
3508 +                               ? skb->nh.iph->saddr 
3509 +                               : skb->nh.iph->daddr),
3510 +#endif
3511 +                      hash_ip);
3512 +}
3513 +
3514 +static int retry(struct ip_set *set)
3515 +{
3516 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3517 +       ip_set_ip_t hash_ip, *elem;
3518 +       void *members;
3519 +       u_int32_t i, hashsize = map->hashsize;
3520 +       int res;
3521 +       struct ip_set_iphash *tmp;
3522 +       
3523 +       if (map->resize == 0)
3524 +               return -ERANGE;
3525 +
3526 +    again:
3527 +       res = 0;
3528 +       
3529 +       /* Calculate new hash size */
3530 +       hashsize += (hashsize * map->resize)/100;
3531 +       if (hashsize == map->hashsize)
3532 +               hashsize++;
3533 +       
3534 +       ip_set_printk("rehashing of set %s triggered: "
3535 +                     "hashsize grows from %u to %u",
3536 +                     set->name, map->hashsize, hashsize);
3537 +
3538 +       tmp = kmalloc(sizeof(struct ip_set_iphash) 
3539 +                     + map->probes * sizeof(uint32_t), GFP_ATOMIC);
3540 +       if (!tmp) {
3541 +               DP("out of memory for %d bytes",
3542 +                  sizeof(struct ip_set_iphash)
3543 +                  + map->probes * sizeof(uint32_t));
3544 +               return -ENOMEM;
3545 +       }
3546 +       tmp->members = harray_malloc(hashsize, sizeof(ip_set_ip_t), GFP_ATOMIC);
3547 +       if (!tmp->members) {
3548 +               DP("out of memory for %d bytes", hashsize * sizeof(ip_set_ip_t));
3549 +               kfree(tmp);
3550 +               return -ENOMEM;
3551 +       }
3552 +       tmp->hashsize = hashsize;
3553 +       tmp->elements = 0;
3554 +       tmp->probes = map->probes;
3555 +       tmp->resize = map->resize;
3556 +       tmp->netmask = map->netmask;
3557 +       memcpy(tmp->initval, map->initval, map->probes * sizeof(uint32_t));
3558 +       
3559 +       write_lock_bh(&set->lock);
3560 +       map = (struct ip_set_iphash *) set->data; /* Play safe */
3561 +       for (i = 0; i < map->hashsize && res == 0; i++) {
3562 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, i);     
3563 +               if (*elem)
3564 +                       res = __addip(tmp, *elem, &hash_ip);
3565 +       }
3566 +       if (res) {
3567 +               /* Failure, try again */
3568 +               write_unlock_bh(&set->lock);
3569 +               harray_free(tmp->members);
3570 +               kfree(tmp);
3571 +               goto again;
3572 +       }
3573 +       
3574 +       /* Success at resizing! */
3575 +       members = map->members;
3576 +
3577 +       map->hashsize = tmp->hashsize;
3578 +       map->members = tmp->members;
3579 +       write_unlock_bh(&set->lock);
3580 +
3581 +       harray_free(members);
3582 +       kfree(tmp);
3583 +
3584 +       return 0;
3585 +}
3586 +
3587 +static inline int
3588 +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3589 +{
3590 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3591 +       ip_set_ip_t id, *elem;
3592 +
3593 +       if (!ip)
3594 +               return -ERANGE;
3595 +
3596 +       id = hash_id(set, ip, hash_ip);
3597 +       if (id == UINT_MAX)
3598 +               return -EEXIST;
3599 +               
3600 +       elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id);
3601 +       *elem = 0;
3602 +       map->elements--;
3603 +
3604 +       return 0;
3605 +}
3606 +
3607 +static int
3608 +delip(struct ip_set *set, const void *data, size_t size,
3609 +        ip_set_ip_t *hash_ip)
3610 +{
3611 +       struct ip_set_req_iphash *req =
3612 +           (struct ip_set_req_iphash *) data;
3613 +
3614 +       if (size != sizeof(struct ip_set_req_iphash)) {
3615 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3616 +                             sizeof(struct ip_set_req_iphash),
3617 +                             size);
3618 +               return -EINVAL;
3619 +       }
3620 +       return __delip(set, req->ip, hash_ip);
3621 +}
3622 +
3623 +static int
3624 +delip_kernel(struct ip_set *set, 
3625 +            const struct sk_buff *skb,
3626 +            ip_set_ip_t *hash_ip,
3627 +            const u_int32_t *flags,
3628 +            unsigned char index)
3629 +{
3630 +       return __delip(set,
3631 +                      ntohl(flags[index] & IPSET_SRC 
3632 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
3633 +                               ? ip_hdr(skb)->saddr 
3634 +                               : ip_hdr(skb)->daddr),
3635 +#else
3636 +                               ? skb->nh.iph->saddr 
3637 +                               : skb->nh.iph->daddr),
3638 +#endif
3639 +                      hash_ip);
3640 +}
3641 +
3642 +static int create(struct ip_set *set, const void *data, size_t size)
3643 +{
3644 +       struct ip_set_req_iphash_create *req =
3645 +           (struct ip_set_req_iphash_create *) data;
3646 +       struct ip_set_iphash *map;
3647 +       uint16_t i;
3648 +
3649 +       if (size != sizeof(struct ip_set_req_iphash_create)) {
3650 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3651 +                              sizeof(struct ip_set_req_iphash_create),
3652 +                              size);
3653 +               return -EINVAL;
3654 +       }
3655 +
3656 +       if (req->hashsize < 1) {
3657 +               ip_set_printk("hashsize too small");
3658 +               return -ENOEXEC;
3659 +       }
3660 +
3661 +       if (req->probes < 1) {
3662 +               ip_set_printk("probes too small");
3663 +               return -ENOEXEC;
3664 +       }
3665 +
3666 +       map = kmalloc(sizeof(struct ip_set_iphash) 
3667 +                     + req->probes * sizeof(uint32_t), GFP_KERNEL);
3668 +       if (!map) {
3669 +               DP("out of memory for %d bytes",
3670 +                  sizeof(struct ip_set_iphash)
3671 +                  + req->probes * sizeof(uint32_t));
3672 +               return -ENOMEM;
3673 +       }
3674 +       for (i = 0; i < req->probes; i++)
3675 +               get_random_bytes(((uint32_t *) map->initval)+i, 4);
3676 +       map->elements = 0;
3677 +       map->hashsize = req->hashsize;
3678 +       map->probes = req->probes;
3679 +       map->resize = req->resize;
3680 +       map->netmask = req->netmask;
3681 +       map->members = harray_malloc(map->hashsize, sizeof(ip_set_ip_t), GFP_KERNEL);
3682 +       if (!map->members) {
3683 +               DP("out of memory for %d bytes", map->hashsize * sizeof(ip_set_ip_t));
3684 +               kfree(map);
3685 +               return -ENOMEM;
3686 +       }
3687 +
3688 +       set->data = map;
3689 +       return 0;
3690 +}
3691 +
3692 +static void destroy(struct ip_set *set)
3693 +{
3694 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3695 +
3696 +       harray_free(map->members);
3697 +       kfree(map);
3698 +
3699 +       set->data = NULL;
3700 +}
3701 +
3702 +static void flush(struct ip_set *set)
3703 +{
3704 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3705 +       harray_flush(map->members, map->hashsize, sizeof(ip_set_ip_t));
3706 +       map->elements = 0;
3707 +}
3708 +
3709 +static void list_header(const struct ip_set *set, void *data)
3710 +{
3711 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3712 +       struct ip_set_req_iphash_create *header =
3713 +           (struct ip_set_req_iphash_create *) data;
3714 +
3715 +       header->hashsize = map->hashsize;
3716 +       header->probes = map->probes;
3717 +       header->resize = map->resize;
3718 +       header->netmask = map->netmask;
3719 +}
3720 +
3721 +static int list_members_size(const struct ip_set *set)
3722 +{
3723 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3724 +
3725 +       return (map->hashsize * sizeof(ip_set_ip_t));
3726 +}
3727 +
3728 +static void list_members(const struct ip_set *set, void *data)
3729 +{
3730 +       struct ip_set_iphash *map = (struct ip_set_iphash *) set->data;
3731 +       ip_set_ip_t i, *elem;
3732 +
3733 +       for (i = 0; i < map->hashsize; i++) {
3734 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, i);     
3735 +               ((ip_set_ip_t *)data)[i] = *elem;
3736 +       }
3737 +}
3738 +
3739 +static struct ip_set_type ip_set_iphash = {
3740 +       .typename               = SETTYPE_NAME,
3741 +       .features               = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
3742 +       .protocol_version       = IP_SET_PROTOCOL_VERSION,
3743 +       .create                 = &create,
3744 +       .destroy                = &destroy,
3745 +       .flush                  = &flush,
3746 +       .reqsize                = sizeof(struct ip_set_req_iphash),
3747 +       .addip                  = &addip,
3748 +       .addip_kernel           = &addip_kernel,
3749 +       .retry                  = &retry,
3750 +       .delip                  = &delip,
3751 +       .delip_kernel           = &delip_kernel,
3752 +       .testip                 = &testip,
3753 +       .testip_kernel          = &testip_kernel,
3754 +       .header_size            = sizeof(struct ip_set_req_iphash_create),
3755 +       .list_header            = &list_header,
3756 +       .list_members_size      = &list_members_size,
3757 +       .list_members           = &list_members,
3758 +       .me                     = THIS_MODULE,
3759 +};
3760 +
3761 +MODULE_LICENSE("GPL");
3762 +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
3763 +MODULE_DESCRIPTION("iphash type of IP sets");
3764 +module_param(limit, int, 0600);
3765 +MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
3766 +
3767 +static int __init ip_set_iphash_init(void)
3768 +{
3769 +       return ip_set_register_set_type(&ip_set_iphash);
3770 +}
3771 +
3772 +static void __exit ip_set_iphash_fini(void)
3773 +{
3774 +       /* FIXME: possible race with ip_set_create() */
3775 +       ip_set_unregister_set_type(&ip_set_iphash);
3776 +}
3777 +
3778 +module_init(ip_set_iphash_init);
3779 +module_exit(ip_set_iphash_fini);
3780 diff --git a/net/ipv4/netfilter/ip_set_ipmap.c b/net/ipv4/netfilter/ip_set_ipmap.c
3781 new file mode 100644
3782 index 0000000..0fe8d3c
3783 --- /dev/null
3784 +++ b/net/ipv4/netfilter/ip_set_ipmap.c
3785 @@ -0,0 +1,336 @@
3786 +/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
3787 + *                         Patrick Schaaf <bof@bof.de>
3788 + * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
3789 + *
3790 + * This program is free software; you can redistribute it and/or modify
3791 + * it under the terms of the GNU General Public License version 2 as
3792 + * published by the Free Software Foundation.  
3793 + */
3794 +
3795 +/* Kernel module implementing an IP set type: the single bitmap type */
3796 +
3797 +#include <linux/module.h>
3798 +#include <linux/ip.h>
3799 +#include <linux/skbuff.h>
3800 +#include <linux/version.h>
3801 +#include <linux/netfilter_ipv4/ip_tables.h>
3802 +#include <linux/netfilter_ipv4/ip_set.h>
3803 +#include <linux/errno.h>
3804 +#include <asm/uaccess.h>
3805 +#include <asm/bitops.h>
3806 +#include <linux/spinlock.h>
3807 +
3808 +#include <linux/netfilter_ipv4/ip_set_ipmap.h>
3809 +
3810 +static inline ip_set_ip_t
3811 +ip_to_id(const struct ip_set_ipmap *map, ip_set_ip_t ip)
3812 +{
3813 +       return (ip - map->first_ip)/map->hosts;
3814 +}
3815 +
3816 +static inline int
3817 +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3818 +{
3819 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
3820 +       
3821 +       if (ip < map->first_ip || ip > map->last_ip)
3822 +               return -ERANGE;
3823 +
3824 +       *hash_ip = ip & map->netmask;
3825 +       DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u",
3826 +          set->name, HIPQUAD(ip), HIPQUAD(*hash_ip));
3827 +       return !!test_bit(ip_to_id(map, *hash_ip), map->members);
3828 +}
3829 +
3830 +static int
3831 +testip(struct ip_set *set, const void *data, size_t size,
3832 +       ip_set_ip_t *hash_ip)
3833 +{
3834 +       struct ip_set_req_ipmap *req = 
3835 +           (struct ip_set_req_ipmap *) data;
3836 +
3837 +       if (size != sizeof(struct ip_set_req_ipmap)) {
3838 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3839 +                             sizeof(struct ip_set_req_ipmap),
3840 +                             size);
3841 +               return -EINVAL;
3842 +       }
3843 +       return __testip(set, req->ip, hash_ip);
3844 +}
3845 +
3846 +static int
3847 +testip_kernel(struct ip_set *set, 
3848 +             const struct sk_buff *skb,
3849 +             ip_set_ip_t *hash_ip,
3850 +             const u_int32_t *flags,
3851 +             unsigned char index)
3852 +{
3853 +       int res =  __testip(set,
3854 +                       ntohl(flags[index] & IPSET_SRC
3855 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
3856 +                               ? ip_hdr(skb)->saddr 
3857 +                               : ip_hdr(skb)->daddr),
3858 +#else
3859 +                               ? skb->nh.iph->saddr 
3860 +                               : skb->nh.iph->daddr),
3861 +#endif
3862 +                       hash_ip);
3863 +       return (res < 0 ? 0 : res);
3864 +}
3865 +
3866 +static inline int
3867 +__addip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3868 +{
3869 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
3870 +
3871 +       if (ip < map->first_ip || ip > map->last_ip)
3872 +               return -ERANGE;
3873 +
3874 +       *hash_ip = ip & map->netmask;
3875 +       DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip));
3876 +       if (test_and_set_bit(ip_to_id(map, *hash_ip), map->members))
3877 +               return -EEXIST;
3878 +
3879 +       return 0;
3880 +}
3881 +
3882 +static int
3883 +addip(struct ip_set *set, const void *data, size_t size,
3884 +      ip_set_ip_t *hash_ip)
3885 +{
3886 +       struct ip_set_req_ipmap *req = 
3887 +           (struct ip_set_req_ipmap *) data;
3888 +
3889 +       if (size != sizeof(struct ip_set_req_ipmap)) {
3890 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3891 +                             sizeof(struct ip_set_req_ipmap),
3892 +                             size);
3893 +               return -EINVAL;
3894 +       }
3895 +       DP("%u.%u.%u.%u", HIPQUAD(req->ip));
3896 +       return __addip(set, req->ip, hash_ip);
3897 +}
3898 +
3899 +static int
3900 +addip_kernel(struct ip_set *set, 
3901 +            const struct sk_buff *skb,
3902 +            ip_set_ip_t *hash_ip,
3903 +            const u_int32_t *flags,
3904 +            unsigned char index)
3905 +{
3906 +       return __addip(set,
3907 +                      ntohl(flags[index] & IPSET_SRC 
3908 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
3909 +                               ? ip_hdr(skb)->saddr 
3910 +                               : ip_hdr(skb)->daddr),
3911 +#else
3912 +                               ? skb->nh.iph->saddr 
3913 +                               : skb->nh.iph->daddr),
3914 +#endif
3915 +                      hash_ip);
3916 +}
3917 +
3918 +static inline int 
3919 +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
3920 +{
3921 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
3922 +
3923 +       if (ip < map->first_ip || ip > map->last_ip)
3924 +               return -ERANGE;
3925 +
3926 +       *hash_ip = ip & map->netmask;
3927 +       DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip));
3928 +       if (!test_and_clear_bit(ip_to_id(map, *hash_ip), map->members))
3929 +               return -EEXIST;
3930 +       
3931 +       return 0;
3932 +}
3933 +
3934 +static int
3935 +delip(struct ip_set *set, const void *data, size_t size,
3936 +      ip_set_ip_t *hash_ip)
3937 +{
3938 +       struct ip_set_req_ipmap *req =
3939 +           (struct ip_set_req_ipmap *) data;
3940 +
3941 +       if (size != sizeof(struct ip_set_req_ipmap)) {
3942 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3943 +                             sizeof(struct ip_set_req_ipmap),
3944 +                             size);
3945 +               return -EINVAL;
3946 +       }
3947 +       return __delip(set, req->ip, hash_ip);
3948 +}
3949 +
3950 +static int
3951 +delip_kernel(struct ip_set *set,
3952 +            const struct sk_buff *skb,
3953 +            ip_set_ip_t *hash_ip,
3954 +            const u_int32_t *flags,
3955 +            unsigned char index)
3956 +{
3957 +       return __delip(set,
3958 +                      ntohl(flags[index] & IPSET_SRC 
3959 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
3960 +                               ? ip_hdr(skb)->saddr 
3961 +                               : ip_hdr(skb)->daddr),
3962 +#else
3963 +                               ? skb->nh.iph->saddr 
3964 +                               : skb->nh.iph->daddr),
3965 +#endif
3966 +                      hash_ip);
3967 +}
3968 +
3969 +static int create(struct ip_set *set, const void *data, size_t size)
3970 +{
3971 +       int newbytes;
3972 +       struct ip_set_req_ipmap_create *req =
3973 +           (struct ip_set_req_ipmap_create *) data;
3974 +       struct ip_set_ipmap *map;
3975 +
3976 +       if (size != sizeof(struct ip_set_req_ipmap_create)) {
3977 +               ip_set_printk("data length wrong (want %zu, have %zu)",
3978 +                             sizeof(struct ip_set_req_ipmap_create),
3979 +                             size);
3980 +               return -EINVAL;
3981 +       }
3982 +
3983 +       DP("from %u.%u.%u.%u to %u.%u.%u.%u",
3984 +          HIPQUAD(req->from), HIPQUAD(req->to));
3985 +
3986 +       if (req->from > req->to) {
3987 +               DP("bad ip range");
3988 +               return -ENOEXEC;
3989 +       }
3990 +
3991 +       map = kmalloc(sizeof(struct ip_set_ipmap), GFP_KERNEL);
3992 +       if (!map) {
3993 +               DP("out of memory for %d bytes",
3994 +                  sizeof(struct ip_set_ipmap));
3995 +               return -ENOMEM;
3996 +       }
3997 +       map->first_ip = req->from;
3998 +       map->last_ip = req->to;
3999 +       map->netmask = req->netmask;
4000 +
4001 +       if (req->netmask == 0xFFFFFFFF) {
4002 +               map->hosts = 1;
4003 +               map->sizeid = map->last_ip - map->first_ip + 1;
4004 +       } else {
4005 +               unsigned int mask_bits, netmask_bits;
4006 +               ip_set_ip_t mask;
4007 +               
4008 +               map->first_ip &= map->netmask;  /* Should we better bark? */
4009 +               
4010 +               mask = range_to_mask(map->first_ip, map->last_ip, &mask_bits);
4011 +               netmask_bits = mask_to_bits(map->netmask);
4012 +               
4013 +               if ((!mask && (map->first_ip || map->last_ip != 0xFFFFFFFF))
4014 +                   || netmask_bits <= mask_bits)
4015 +                       return -ENOEXEC;
4016 +
4017 +               DP("mask_bits %u, netmask_bits %u",
4018 +                  mask_bits, netmask_bits);
4019 +               map->hosts = 2 << (32 - netmask_bits - 1);
4020 +               map->sizeid = 2 << (netmask_bits - mask_bits - 1);
4021 +       }
4022 +       if (map->sizeid > MAX_RANGE + 1) {
4023 +               ip_set_printk("range too big (max %d addresses)",
4024 +                              MAX_RANGE+1);
4025 +               kfree(map);
4026 +               return -ENOEXEC;
4027 +       }
4028 +       DP("hosts %u, sizeid %u", map->hosts, map->sizeid);
4029 +       newbytes = bitmap_bytes(0, map->sizeid - 1);
4030 +       map->members = kmalloc(newbytes, GFP_KERNEL);
4031 +       if (!map->members) {
4032 +               DP("out of memory for %d bytes", newbytes);
4033 +               kfree(map);
4034 +               return -ENOMEM;
4035 +       }
4036 +       memset(map->members, 0, newbytes);
4037 +       
4038 +       set->data = map;
4039 +       return 0;
4040 +}
4041 +
4042 +static void destroy(struct ip_set *set)
4043 +{
4044 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
4045 +       
4046 +       kfree(map->members);
4047 +       kfree(map);
4048 +       
4049 +       set->data = NULL;
4050 +}
4051 +
4052 +static void flush(struct ip_set *set)
4053 +{
4054 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
4055 +       memset(map->members, 0, bitmap_bytes(0, map->sizeid - 1));
4056 +}
4057 +
4058 +static void list_header(const struct ip_set *set, void *data)
4059 +{
4060 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
4061 +       struct ip_set_req_ipmap_create *header =
4062 +           (struct ip_set_req_ipmap_create *) data;
4063 +
4064 +       header->from = map->first_ip;
4065 +       header->to = map->last_ip;
4066 +       header->netmask = map->netmask;
4067 +}
4068 +
4069 +static int list_members_size(const struct ip_set *set)
4070 +{
4071 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
4072 +
4073 +       return bitmap_bytes(0, map->sizeid - 1);
4074 +}
4075 +
4076 +static void list_members(const struct ip_set *set, void *data)
4077 +{
4078 +       struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data;
4079 +       int bytes = bitmap_bytes(0, map->sizeid - 1);
4080 +
4081 +       memcpy(data, map->members, bytes);
4082 +}
4083 +
4084 +static struct ip_set_type ip_set_ipmap = {
4085 +       .typename               = SETTYPE_NAME,
4086 +       .features               = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
4087 +       .protocol_version       = IP_SET_PROTOCOL_VERSION,
4088 +       .create                 = &create,
4089 +       .destroy                = &destroy,
4090 +       .flush                  = &flush,
4091 +       .reqsize                = sizeof(struct ip_set_req_ipmap),
4092 +       .addip                  = &addip,
4093 +       .addip_kernel           = &addip_kernel,
4094 +       .delip                  = &delip,
4095 +       .delip_kernel           = &delip_kernel,
4096 +       .testip                 = &testip,
4097 +       .testip_kernel          = &testip_kernel,
4098 +       .header_size            = sizeof(struct ip_set_req_ipmap_create),
4099 +       .list_header            = &list_header,
4100 +       .list_members_size      = &list_members_size,
4101 +       .list_members           = &list_members,
4102 +       .me                     = THIS_MODULE,
4103 +};
4104 +
4105 +MODULE_LICENSE("GPL");
4106 +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
4107 +MODULE_DESCRIPTION("ipmap type of IP sets");
4108 +
4109 +static int __init ip_set_ipmap_init(void)
4110 +{
4111 +       return ip_set_register_set_type(&ip_set_ipmap);
4112 +}
4113 +
4114 +static void __exit ip_set_ipmap_fini(void)
4115 +{
4116 +       /* FIXME: possible race with ip_set_create() */
4117 +       ip_set_unregister_set_type(&ip_set_ipmap);
4118 +}
4119 +
4120 +module_init(ip_set_ipmap_init);
4121 +module_exit(ip_set_ipmap_fini);
4122 diff --git a/net/ipv4/netfilter/ip_set_ipporthash.c b/net/ipv4/netfilter/ip_set_ipporthash.c
4123 new file mode 100644
4124 index 0000000..0d562b8
4125 --- /dev/null
4126 +++ b/net/ipv4/netfilter/ip_set_ipporthash.c
4127 @@ -0,0 +1,581 @@
4128 +/* Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4129 + *
4130 + * This program is free software; you can redistribute it and/or modify
4131 + * it under the terms of the GNU General Public License version 2 as
4132 + * published by the Free Software Foundation.  
4133 + */
4134 +
4135 +/* Kernel module implementing an ip+port hash set */
4136 +
4137 +#include <linux/module.h>
4138 +#include <linux/ip.h>
4139 +#include <linux/tcp.h>
4140 +#include <linux/udp.h>
4141 +#include <linux/skbuff.h>
4142 +#include <linux/version.h>
4143 +#include <linux/jhash.h>
4144 +#include <linux/netfilter_ipv4/ip_tables.h>
4145 +#include <linux/netfilter_ipv4/ip_set.h>
4146 +#include <linux/errno.h>
4147 +#include <asm/uaccess.h>
4148 +#include <asm/bitops.h>
4149 +#include <linux/spinlock.h>
4150 +#include <linux/vmalloc.h>
4151 +#include <linux/random.h>
4152 +
4153 +#include <net/ip.h>
4154 +
4155 +#include <linux/netfilter_ipv4/ip_set_malloc.h>
4156 +#include <linux/netfilter_ipv4/ip_set_ipporthash.h>
4157 +
4158 +static int limit = MAX_RANGE;
4159 +
4160 +/* We must handle non-linear skbs */
4161 +static inline ip_set_ip_t
4162 +get_port(const struct sk_buff *skb, u_int32_t flags)
4163 +{
4164 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4165 +       struct iphdr *iph = ip_hdr(skb);
4166 +#else
4167 +       struct iphdr *iph = skb->nh.iph;
4168 +#endif
4169 +       u_int16_t offset = ntohs(iph->frag_off) & IP_OFFSET;
4170 +
4171 +       switch (iph->protocol) {
4172 +       case IPPROTO_TCP: {
4173 +               struct tcphdr tcph;
4174 +               
4175 +               /* See comments at tcp_match in ip_tables.c */
4176 +               if (offset)
4177 +                       return INVALID_PORT;
4178 +
4179 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4180 +               if (skb_copy_bits(skb, ip_hdr(skb)->ihl*4, &tcph, sizeof(tcph)) < 0)
4181 +#else
4182 +               if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0)
4183 +#endif
4184 +                       /* No choice either */
4185 +                       return INVALID_PORT;
4186 +               
4187 +               return ntohs(flags & IPSET_SRC ?
4188 +                            tcph.source : tcph.dest);
4189 +           }
4190 +       case IPPROTO_UDP: {
4191 +               struct udphdr udph;
4192 +
4193 +               if (offset)
4194 +                       return INVALID_PORT;
4195 +
4196 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4197 +               if (skb_copy_bits(skb, ip_hdr(skb)->ihl*4, &udph, sizeof(udph)) < 0)
4198 +#else
4199 +               if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0)
4200 +#endif
4201 +                       /* No choice either */
4202 +                       return INVALID_PORT;
4203 +               
4204 +               return ntohs(flags & IPSET_SRC ?
4205 +                            udph.source : udph.dest);
4206 +           }
4207 +       default:
4208 +               return INVALID_PORT;
4209 +       }
4210 +}
4211 +
4212 +static inline __u32
4213 +jhash_ip(const struct ip_set_ipporthash *map, uint16_t i, ip_set_ip_t ip)
4214 +{
4215 +       return jhash_1word(ip, *(((uint32_t *) map->initval) + i));
4216 +}
4217 +
4218 +#define HASH_IP(map, ip, port) (port + ((ip - ((map)->first_ip)) << 16))
4219 +
4220 +static inline __u32
4221 +hash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port,
4222 +       ip_set_ip_t *hash_ip)
4223 +{
4224 +       struct ip_set_ipporthash *map = 
4225 +               (struct ip_set_ipporthash *) set->data;
4226 +       __u32 id;
4227 +       u_int16_t i;
4228 +       ip_set_ip_t *elem;
4229 +
4230 +       *hash_ip = HASH_IP(map, ip, port);
4231 +       DP("set: %s, ipport:%u.%u.%u.%u:%u, %u.%u.%u.%u",
4232 +          set->name, HIPQUAD(ip), port, HIPQUAD(*hash_ip));
4233 +       
4234 +       for (i = 0; i < map->probes; i++) {
4235 +               id = jhash_ip(map, i, *hash_ip) % map->hashsize;
4236 +               DP("hash key: %u", id);
4237 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id);
4238 +               if (*elem == *hash_ip)
4239 +                       return id;
4240 +               /* No shortcut at testing - there can be deleted
4241 +                * entries. */
4242 +       }
4243 +       return UINT_MAX;
4244 +}
4245 +
4246 +static inline int
4247 +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port,
4248 +        ip_set_ip_t *hash_ip)
4249 +{
4250 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4251 +       
4252 +       if (ip < map->first_ip || ip > map->last_ip)
4253 +               return -ERANGE;
4254 +
4255 +       return (hash_id(set, ip, port, hash_ip) != UINT_MAX);
4256 +}
4257 +
4258 +static int
4259 +testip(struct ip_set *set, const void *data, size_t size,
4260 +       ip_set_ip_t *hash_ip)
4261 +{
4262 +       struct ip_set_req_ipporthash *req = 
4263 +           (struct ip_set_req_ipporthash *) data;
4264 +
4265 +       if (size != sizeof(struct ip_set_req_ipporthash)) {
4266 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4267 +                             sizeof(struct ip_set_req_ipporthash),
4268 +                             size);
4269 +               return -EINVAL;
4270 +       }
4271 +       return __testip(set, req->ip, req->port, hash_ip);
4272 +}
4273 +
4274 +static int
4275 +testip_kernel(struct ip_set *set, 
4276 +             const struct sk_buff *skb,
4277 +             ip_set_ip_t *hash_ip,
4278 +             const u_int32_t *flags,
4279 +             unsigned char index)
4280 +{
4281 +       ip_set_ip_t port;
4282 +       int res;
4283 +
4284 +       if (flags[index+1] == 0)
4285 +               return 0;
4286 +               
4287 +       port = get_port(skb, flags[index+1]);
4288 +
4289 +       DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
4290 +          flags[index] & IPSET_SRC ? "SRC" : "DST",
4291 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4292 +          NIPQUAD(ip_hdr(skb)->saddr),
4293 +          NIPQUAD(ip_hdr(skb)->daddr));
4294 +#else
4295 +          NIPQUAD(skb->nh.iph->saddr),
4296 +          NIPQUAD(skb->nh.iph->daddr));
4297 +#endif
4298 +       DP("flag %s port %u",
4299 +          flags[index+1] & IPSET_SRC ? "SRC" : "DST", 
4300 +          port);       
4301 +       if (port == INVALID_PORT)
4302 +               return 0;       
4303 +
4304 +       res =  __testip(set,
4305 +                       ntohl(flags[index] & IPSET_SRC 
4306 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4307 +                                       ? ip_hdr(skb)->saddr 
4308 +                                       : ip_hdr(skb)->daddr),
4309 +#else
4310 +                                       ? skb->nh.iph->saddr 
4311 +                                       : skb->nh.iph->daddr),
4312 +#endif
4313 +                       port,
4314 +                       hash_ip);
4315 +       return (res < 0 ? 0 : res);
4316 +       
4317 +}
4318 +
4319 +static inline int
4320 +__add_haship(struct ip_set_ipporthash *map, ip_set_ip_t hash_ip)
4321 +{
4322 +       __u32 probe;
4323 +       u_int16_t i;
4324 +       ip_set_ip_t *elem;
4325 +
4326 +       for (i = 0; i < map->probes; i++) {
4327 +               probe = jhash_ip(map, i, hash_ip) % map->hashsize;
4328 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, probe);
4329 +               if (*elem == hash_ip)
4330 +                       return -EEXIST;
4331 +               if (!*elem) {
4332 +                       *elem = hash_ip;
4333 +                       map->elements++;
4334 +                       return 0;
4335 +               }
4336 +       }
4337 +       /* Trigger rehashing */
4338 +       return -EAGAIN;
4339 +}
4340 +
4341 +static inline int
4342 +__addip(struct ip_set_ipporthash *map, ip_set_ip_t ip, ip_set_ip_t port,
4343 +       ip_set_ip_t *hash_ip)
4344 +{
4345 +       if (map->elements > limit)
4346 +               return -ERANGE;
4347 +       if (ip < map->first_ip || ip > map->last_ip)
4348 +               return -ERANGE;
4349 +
4350 +       *hash_ip = HASH_IP(map, ip, port);
4351 +       
4352 +       return __add_haship(map, *hash_ip);
4353 +}
4354 +
4355 +static int
4356 +addip(struct ip_set *set, const void *data, size_t size,
4357 +        ip_set_ip_t *hash_ip)
4358 +{
4359 +       struct ip_set_req_ipporthash *req = 
4360 +           (struct ip_set_req_ipporthash *) data;
4361 +
4362 +       if (size != sizeof(struct ip_set_req_ipporthash)) {
4363 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4364 +                             sizeof(struct ip_set_req_ipporthash),
4365 +                             size);
4366 +               return -EINVAL;
4367 +       }
4368 +       return __addip((struct ip_set_ipporthash *) set->data, 
4369 +                       req->ip, req->port, hash_ip);
4370 +}
4371 +
4372 +static int
4373 +addip_kernel(struct ip_set *set, 
4374 +            const struct sk_buff *skb,
4375 +            ip_set_ip_t *hash_ip,
4376 +            const u_int32_t *flags,
4377 +            unsigned char index)
4378 +{
4379 +       ip_set_ip_t port;
4380 +
4381 +       if (flags[index+1] == 0)
4382 +               return -EINVAL;
4383 +               
4384 +       port = get_port(skb, flags[index+1]);
4385 +
4386 +       DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
4387 +          flags[index] & IPSET_SRC ? "SRC" : "DST",
4388 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4389 +          NIPQUAD(ip_hdr(skb)->saddr),
4390 +          NIPQUAD(ip_hdr(skb)->daddr));
4391 +#else
4392 +          NIPQUAD(skb->nh.iph->saddr),
4393 +          NIPQUAD(skb->nh.iph->daddr));
4394 +#endif
4395 +       DP("flag %s port %u", 
4396 +          flags[index+1] & IPSET_SRC ? "SRC" : "DST", 
4397 +          port);       
4398 +       if (port == INVALID_PORT)
4399 +               return -EINVAL; 
4400 +
4401 +       return __addip((struct ip_set_ipporthash *) set->data,
4402 +                      ntohl(flags[index] & IPSET_SRC 
4403 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4404 +                               ? ip_hdr(skb)->saddr 
4405 +                               : ip_hdr(skb)->daddr),
4406 +#else
4407 +                               ? skb->nh.iph->saddr 
4408 +                               : skb->nh.iph->daddr),
4409 +#endif
4410 +                      port,
4411 +                      hash_ip);
4412 +}
4413 +
4414 +static int retry(struct ip_set *set)
4415 +{
4416 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4417 +       ip_set_ip_t *elem;
4418 +       void *members;
4419 +       u_int32_t i, hashsize = map->hashsize;
4420 +       int res;
4421 +       struct ip_set_ipporthash *tmp;
4422 +       
4423 +       if (map->resize == 0)
4424 +               return -ERANGE;
4425 +
4426 +    again:
4427 +       res = 0;
4428 +       
4429 +       /* Calculate new hash size */
4430 +       hashsize += (hashsize * map->resize)/100;
4431 +       if (hashsize == map->hashsize)
4432 +               hashsize++;
4433 +       
4434 +       ip_set_printk("rehashing of set %s triggered: "
4435 +                     "hashsize grows from %u to %u",
4436 +                     set->name, map->hashsize, hashsize);
4437 +
4438 +       tmp = kmalloc(sizeof(struct ip_set_ipporthash) 
4439 +                     + map->probes * sizeof(uint32_t), GFP_ATOMIC);
4440 +       if (!tmp) {
4441 +               DP("out of memory for %d bytes",
4442 +                  sizeof(struct ip_set_ipporthash)
4443 +                  + map->probes * sizeof(uint32_t));
4444 +               return -ENOMEM;
4445 +       }
4446 +       tmp->members = harray_malloc(hashsize, sizeof(ip_set_ip_t), GFP_ATOMIC);
4447 +       if (!tmp->members) {
4448 +               DP("out of memory for %d bytes", hashsize * sizeof(ip_set_ip_t));
4449 +               kfree(tmp);
4450 +               return -ENOMEM;
4451 +       }
4452 +       tmp->hashsize = hashsize;
4453 +       tmp->elements = 0;
4454 +       tmp->probes = map->probes;
4455 +       tmp->resize = map->resize;
4456 +       tmp->first_ip = map->first_ip;
4457 +       tmp->last_ip = map->last_ip;
4458 +       memcpy(tmp->initval, map->initval, map->probes * sizeof(uint32_t));
4459 +       
4460 +       write_lock_bh(&set->lock);
4461 +       map = (struct ip_set_ipporthash *) set->data; /* Play safe */
4462 +       for (i = 0; i < map->hashsize && res == 0; i++) {
4463 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, i);     
4464 +               if (*elem)
4465 +                       res = __add_haship(tmp, *elem);
4466 +       }
4467 +       if (res) {
4468 +               /* Failure, try again */
4469 +               write_unlock_bh(&set->lock);
4470 +               harray_free(tmp->members);
4471 +               kfree(tmp);
4472 +               goto again;
4473 +       }
4474 +       
4475 +       /* Success at resizing! */
4476 +       members = map->members;
4477 +
4478 +       map->hashsize = tmp->hashsize;
4479 +       map->members = tmp->members;
4480 +       write_unlock_bh(&set->lock);
4481 +
4482 +       harray_free(members);
4483 +       kfree(tmp);
4484 +
4485 +       return 0;
4486 +}
4487 +
4488 +static inline int
4489 +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port,
4490 +       ip_set_ip_t *hash_ip)
4491 +{
4492 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4493 +       ip_set_ip_t id;
4494 +       ip_set_ip_t *elem;
4495 +
4496 +       if (ip < map->first_ip || ip > map->last_ip)
4497 +               return -ERANGE;
4498 +
4499 +       id = hash_id(set, ip, port, hash_ip);
4500 +
4501 +       if (id == UINT_MAX)
4502 +               return -EEXIST;
4503 +               
4504 +       elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id);
4505 +       *elem = 0;
4506 +       map->elements--;
4507 +
4508 +       return 0;
4509 +}
4510 +
4511 +static int
4512 +delip(struct ip_set *set, const void *data, size_t size,
4513 +        ip_set_ip_t *hash_ip)
4514 +{
4515 +       struct ip_set_req_ipporthash *req =
4516 +           (struct ip_set_req_ipporthash *) data;
4517 +
4518 +       if (size != sizeof(struct ip_set_req_ipporthash)) {
4519 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4520 +                             sizeof(struct ip_set_req_ipporthash),
4521 +                             size);
4522 +               return -EINVAL;
4523 +       }
4524 +       return __delip(set, req->ip, req->port, hash_ip);
4525 +}
4526 +
4527 +static int
4528 +delip_kernel(struct ip_set *set, 
4529 +            const struct sk_buff *skb,
4530 +            ip_set_ip_t *hash_ip,
4531 +            const u_int32_t *flags,
4532 +            unsigned char index)
4533 +{
4534 +       ip_set_ip_t port;
4535 +
4536 +       if (flags[index+1] == 0)
4537 +               return -EINVAL;
4538 +               
4539 +       port = get_port(skb, flags[index+1]);
4540 +
4541 +       DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
4542 +          flags[index] & IPSET_SRC ? "SRC" : "DST",
4543 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4544 +          NIPQUAD(ip_hdr(skb)->saddr),
4545 +          NIPQUAD(ip_hdr(skb)->daddr));
4546 +#else
4547 +          NIPQUAD(skb->nh.iph->saddr),
4548 +          NIPQUAD(skb->nh.iph->daddr));
4549 +#endif
4550 +       DP("flag %s port %u",
4551 +          flags[index+1] & IPSET_SRC ? "SRC" : "DST", 
4552 +          port);       
4553 +       if (port == INVALID_PORT)
4554 +               return -EINVAL; 
4555 +
4556 +       return __delip(set,
4557 +                      ntohl(flags[index] & IPSET_SRC 
4558 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4559 +                               ? ip_hdr(skb)->saddr 
4560 +                               : ip_hdr(skb)->daddr),
4561 +#else
4562 +                               ? skb->nh.iph->saddr 
4563 +                               : skb->nh.iph->daddr),
4564 +#endif
4565 +                      port,
4566 +                      hash_ip);
4567 +}
4568 +
4569 +static int create(struct ip_set *set, const void *data, size_t size)
4570 +{
4571 +       struct ip_set_req_ipporthash_create *req =
4572 +           (struct ip_set_req_ipporthash_create *) data;
4573 +       struct ip_set_ipporthash *map;
4574 +       uint16_t i;
4575 +
4576 +       if (size != sizeof(struct ip_set_req_ipporthash_create)) {
4577 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4578 +                              sizeof(struct ip_set_req_ipporthash_create),
4579 +                              size);
4580 +               return -EINVAL;
4581 +       }
4582 +
4583 +       if (req->hashsize < 1) {
4584 +               ip_set_printk("hashsize too small");
4585 +               return -ENOEXEC;
4586 +       }
4587 +
4588 +       if (req->probes < 1) {
4589 +               ip_set_printk("probes too small");
4590 +               return -ENOEXEC;
4591 +       }
4592 +
4593 +       map = kmalloc(sizeof(struct ip_set_ipporthash) 
4594 +                     + req->probes * sizeof(uint32_t), GFP_KERNEL);
4595 +       if (!map) {
4596 +               DP("out of memory for %d bytes",
4597 +                  sizeof(struct ip_set_ipporthash)
4598 +                  + req->probes * sizeof(uint32_t));
4599 +               return -ENOMEM;
4600 +       }
4601 +       for (i = 0; i < req->probes; i++)
4602 +               get_random_bytes(((uint32_t *) map->initval)+i, 4);
4603 +       map->elements = 0;
4604 +       map->hashsize = req->hashsize;
4605 +       map->probes = req->probes;
4606 +       map->resize = req->resize;
4607 +       map->first_ip = req->from;
4608 +       map->last_ip = req->to;
4609 +       map->members = harray_malloc(map->hashsize, sizeof(ip_set_ip_t), GFP_KERNEL);
4610 +       if (!map->members) {
4611 +               DP("out of memory for %d bytes", map->hashsize * sizeof(ip_set_ip_t));
4612 +               kfree(map);
4613 +               return -ENOMEM;
4614 +       }
4615 +
4616 +       set->data = map;
4617 +       return 0;
4618 +}
4619 +
4620 +static void destroy(struct ip_set *set)
4621 +{
4622 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4623 +
4624 +       harray_free(map->members);
4625 +       kfree(map);
4626 +
4627 +       set->data = NULL;
4628 +}
4629 +
4630 +static void flush(struct ip_set *set)
4631 +{
4632 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4633 +       harray_flush(map->members, map->hashsize, sizeof(ip_set_ip_t));
4634 +       map->elements = 0;
4635 +}
4636 +
4637 +static void list_header(const struct ip_set *set, void *data)
4638 +{
4639 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4640 +       struct ip_set_req_ipporthash_create *header =
4641 +           (struct ip_set_req_ipporthash_create *) data;
4642 +
4643 +       header->hashsize = map->hashsize;
4644 +       header->probes = map->probes;
4645 +       header->resize = map->resize;
4646 +       header->from = map->first_ip;
4647 +       header->to = map->last_ip;
4648 +}
4649 +
4650 +static int list_members_size(const struct ip_set *set)
4651 +{
4652 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4653 +
4654 +       return (map->hashsize * sizeof(ip_set_ip_t));
4655 +}
4656 +
4657 +static void list_members(const struct ip_set *set, void *data)
4658 +{
4659 +       struct ip_set_ipporthash *map = (struct ip_set_ipporthash *) set->data;
4660 +       ip_set_ip_t i, *elem;
4661 +
4662 +       for (i = 0; i < map->hashsize; i++) {
4663 +               elem = HARRAY_ELEM(map->members, ip_set_ip_t *, i);     
4664 +               ((ip_set_ip_t *)data)[i] = *elem;
4665 +       }
4666 +}
4667 +
4668 +static struct ip_set_type ip_set_ipporthash = {
4669 +       .typename               = SETTYPE_NAME,
4670 +       .features               = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_DATA_DOUBLE,
4671 +       .protocol_version       = IP_SET_PROTOCOL_VERSION,
4672 +       .create                 = &create,
4673 +       .destroy                = &destroy,
4674 +       .flush                  = &flush,
4675 +       .reqsize                = sizeof(struct ip_set_req_ipporthash),
4676 +       .addip                  = &addip,
4677 +       .addip_kernel           = &addip_kernel,
4678 +       .retry                  = &retry,
4679 +       .delip                  = &delip,
4680 +       .delip_kernel           = &delip_kernel,
4681 +       .testip                 = &testip,
4682 +       .testip_kernel          = &testip_kernel,
4683 +       .header_size            = sizeof(struct ip_set_req_ipporthash_create),
4684 +       .list_header            = &list_header,
4685 +       .list_members_size      = &list_members_size,
4686 +       .list_members           = &list_members,
4687 +       .me                     = THIS_MODULE,
4688 +};
4689 +
4690 +MODULE_LICENSE("GPL");
4691 +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
4692 +MODULE_DESCRIPTION("ipporthash type of IP sets");
4693 +module_param(limit, int, 0600);
4694 +MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
4695 +
4696 +static int __init ip_set_ipporthash_init(void)
4697 +{
4698 +       return ip_set_register_set_type(&ip_set_ipporthash);
4699 +}
4700 +
4701 +static void __exit ip_set_ipporthash_fini(void)
4702 +{
4703 +       /* FIXME: possible race with ip_set_create() */
4704 +       ip_set_unregister_set_type(&ip_set_ipporthash);
4705 +}
4706 +
4707 +module_init(ip_set_ipporthash_init);
4708 +module_exit(ip_set_ipporthash_fini);
4709 diff --git a/net/ipv4/netfilter/ip_set_iptree.c b/net/ipv4/netfilter/ip_set_iptree.c
4710 new file mode 100644
4711 index 0000000..0180534
4712 --- /dev/null
4713 +++ b/net/ipv4/netfilter/ip_set_iptree.c
4714 @@ -0,0 +1,612 @@
4715 +/* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4716 + *
4717 + * This program is free software; you can redistribute it and/or modify
4718 + * it under the terms of the GNU General Public License version 2 as
4719 + * published by the Free Software Foundation.  
4720 + */
4721 +
4722 +/* Kernel module implementing an IP set type: the iptree type */
4723 +
4724 +#include <linux/version.h>
4725 +#include <linux/module.h>
4726 +#include <linux/ip.h>
4727 +#include <linux/skbuff.h>
4728 +#include <linux/slab.h>
4729 +#include <linux/delay.h>
4730 +#include <linux/netfilter_ipv4/ip_tables.h>
4731 +#include <linux/netfilter_ipv4/ip_set.h>
4732 +#include <linux/errno.h>
4733 +#include <asm/uaccess.h>
4734 +#include <asm/bitops.h>
4735 +#include <linux/spinlock.h>
4736 +
4737 +/* Backward compatibility */
4738 +#ifndef __nocast
4739 +#define __nocast
4740 +#endif
4741 +
4742 +#include <linux/netfilter_ipv4/ip_set_iptree.h>
4743 +
4744 +static int limit = MAX_RANGE;
4745 +
4746 +/* Garbage collection interval in seconds: */
4747 +#define IPTREE_GC_TIME         5*60
4748 +/* Sleep so many milliseconds before trying again 
4749 + * to delete the gc timer at destroying/flushing a set */ 
4750 +#define IPTREE_DESTROY_SLEEP   100
4751 +
4752 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
4753 +static struct kmem_cache *branch_cachep;
4754 +static struct kmem_cache *leaf_cachep;
4755 +#else
4756 +static kmem_cache_t *branch_cachep;
4757 +static kmem_cache_t *leaf_cachep;
4758 +#endif
4759 +
4760 +#if defined(__LITTLE_ENDIAN)
4761 +#define ABCD(a,b,c,d,addrp) do {               \
4762 +       a = ((unsigned char *)addrp)[3];        \
4763 +       b = ((unsigned char *)addrp)[2];        \
4764 +       c = ((unsigned char *)addrp)[1];        \
4765 +       d = ((unsigned char *)addrp)[0];        \
4766 +} while (0)
4767 +#elif defined(__BIG_ENDIAN)
4768 +#define ABCD(a,b,c,d,addrp) do {               \
4769 +       a = ((unsigned char *)addrp)[0];        \
4770 +       b = ((unsigned char *)addrp)[1];        \
4771 +       c = ((unsigned char *)addrp)[2];        \
4772 +       d = ((unsigned char *)addrp)[3];        \
4773 +} while (0)
4774 +#else
4775 +#error "Please fix asm/byteorder.h"
4776 +#endif /* __LITTLE_ENDIAN */
4777 +
4778 +#define TESTIP_WALK(map, elem, branch) do {    \
4779 +       if ((map)->tree[elem]) {                \
4780 +               branch = (map)->tree[elem];     \
4781 +       } else                                  \
4782 +               return 0;                       \
4783 +} while (0)
4784 +
4785 +static inline int
4786 +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
4787 +{
4788 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
4789 +       struct ip_set_iptreeb *btree;
4790 +       struct ip_set_iptreec *ctree;
4791 +       struct ip_set_iptreed *dtree;
4792 +       unsigned char a,b,c,d;
4793 +
4794 +       if (!ip)
4795 +               return -ERANGE;
4796 +       
4797 +       *hash_ip = ip;
4798 +       ABCD(a, b, c, d, hash_ip);
4799 +       DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout);
4800 +       TESTIP_WALK(map, a, btree);
4801 +       TESTIP_WALK(btree, b, ctree);
4802 +       TESTIP_WALK(ctree, c, dtree);
4803 +       DP("%lu %lu", dtree->expires[d], jiffies);
4804 +       return dtree->expires[d]
4805 +              && (!map->timeout
4806 +                  || time_after(dtree->expires[d], jiffies));
4807 +}
4808 +
4809 +static int
4810 +testip(struct ip_set *set, const void *data, size_t size,
4811 +       ip_set_ip_t *hash_ip)
4812 +{
4813 +       struct ip_set_req_iptree *req = 
4814 +           (struct ip_set_req_iptree *) data;
4815 +
4816 +       if (size != sizeof(struct ip_set_req_iptree)) {
4817 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4818 +                             sizeof(struct ip_set_req_iptree),
4819 +                             size);
4820 +               return -EINVAL;
4821 +       }
4822 +       return __testip(set, req->ip, hash_ip);
4823 +}
4824 +
4825 +static int
4826 +testip_kernel(struct ip_set *set, 
4827 +             const struct sk_buff *skb,
4828 +             ip_set_ip_t *hash_ip,
4829 +             const u_int32_t *flags,
4830 +             unsigned char index)
4831 +{
4832 +       int res;
4833 +       
4834 +       DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
4835 +          flags[index] & IPSET_SRC ? "SRC" : "DST",
4836 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4837 +          NIPQUAD(ip_hdr(skb)->saddr),
4838 +          NIPQUAD(ip_hdr(skb)->daddr));
4839 +#else
4840 +          NIPQUAD(skb->nh.iph->saddr),
4841 +          NIPQUAD(skb->nh.iph->daddr));
4842 +#endif
4843 +
4844 +       res =  __testip(set,
4845 +                       ntohl(flags[index] & IPSET_SRC 
4846 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4847 +                               ? ip_hdr(skb)->saddr 
4848 +                               : ip_hdr(skb)->daddr),
4849 +#else
4850 +                               ? skb->nh.iph->saddr 
4851 +                               : skb->nh.iph->daddr),
4852 +#endif
4853 +                       hash_ip);
4854 +       return (res < 0 ? 0 : res);
4855 +}
4856 +
4857 +#define ADDIP_WALK(map, elem, branch, type, cachep) do {       \
4858 +       if ((map)->tree[elem]) {                                \
4859 +               DP("found %u", elem);                           \
4860 +               branch = (map)->tree[elem];                     \
4861 +       } else {                                                \
4862 +               branch = (type *)                               \
4863 +                       kmem_cache_alloc(cachep, GFP_ATOMIC);   \
4864 +               if (branch == NULL)                             \
4865 +                       return -ENOMEM;                         \
4866 +               memset(branch, 0, sizeof(*branch));             \
4867 +               (map)->tree[elem] = branch;                     \
4868 +               DP("alloc %u", elem);                           \
4869 +       }                                                       \
4870 +} while (0)    
4871 +
4872 +static inline int
4873 +__addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout,
4874 +       ip_set_ip_t *hash_ip)
4875 +{
4876 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
4877 +       struct ip_set_iptreeb *btree;
4878 +       struct ip_set_iptreec *ctree;
4879 +       struct ip_set_iptreed *dtree;
4880 +       unsigned char a,b,c,d;
4881 +       int ret = 0;
4882 +       
4883 +       if (!ip || map->elements >= limit)
4884 +               /* We could call the garbage collector
4885 +                * but it's probably overkill */
4886 +               return -ERANGE;
4887 +       
4888 +       *hash_ip = ip;
4889 +       ABCD(a, b, c, d, hash_ip);
4890 +       DP("%u %u %u %u timeout %u", a, b, c, d, timeout);
4891 +       ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep);
4892 +       ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep);
4893 +       ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep);
4894 +       if (dtree->expires[d]
4895 +           && (!map->timeout || time_after(dtree->expires[d], jiffies)))
4896 +               ret = -EEXIST;
4897 +       dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1;
4898 +       /* Lottery: I won! */
4899 +       if (dtree->expires[d] == 0)
4900 +               dtree->expires[d] = 1;
4901 +       DP("%u %lu", d, dtree->expires[d]);
4902 +       if (ret == 0)
4903 +               map->elements++;
4904 +       return ret;
4905 +}
4906 +
4907 +static int
4908 +addip(struct ip_set *set, const void *data, size_t size,
4909 +      ip_set_ip_t *hash_ip)
4910 +{
4911 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
4912 +       struct ip_set_req_iptree *req = 
4913 +               (struct ip_set_req_iptree *) data;
4914 +
4915 +       if (size != sizeof(struct ip_set_req_iptree)) {
4916 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4917 +                             sizeof(struct ip_set_req_iptree),
4918 +                             size);
4919 +               return -EINVAL;
4920 +       }
4921 +       DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout);
4922 +       return __addip(set, req->ip,
4923 +                      req->timeout ? req->timeout : map->timeout,
4924 +                      hash_ip);
4925 +}
4926 +
4927 +static int
4928 +addip_kernel(struct ip_set *set, 
4929 +            const struct sk_buff *skb,
4930 +            ip_set_ip_t *hash_ip,
4931 +            const u_int32_t *flags,
4932 +            unsigned char index)
4933 +{
4934 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
4935 +
4936 +       return __addip(set,
4937 +                      ntohl(flags[index] & IPSET_SRC 
4938 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
4939 +                               ? ip_hdr(skb)->saddr 
4940 +                               : ip_hdr(skb)->daddr),
4941 +#else
4942 +                               ? skb->nh.iph->saddr 
4943 +                               : skb->nh.iph->daddr),
4944 +#endif
4945 +                      map->timeout,
4946 +                      hash_ip);
4947 +}
4948 +
4949 +#define DELIP_WALK(map, elem, branch) do {     \
4950 +       if ((map)->tree[elem]) {                \
4951 +               branch = (map)->tree[elem];     \
4952 +       } else                                  \
4953 +               return -EEXIST;                 \
4954 +} while (0)
4955 +
4956 +static inline int 
4957 +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
4958 +{
4959 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
4960 +       struct ip_set_iptreeb *btree;
4961 +       struct ip_set_iptreec *ctree;
4962 +       struct ip_set_iptreed *dtree;
4963 +       unsigned char a,b,c,d;
4964 +       
4965 +       if (!ip)
4966 +               return -ERANGE;
4967 +               
4968 +       *hash_ip = ip;
4969 +       ABCD(a, b, c, d, hash_ip);
4970 +       DELIP_WALK(map, a, btree);
4971 +       DELIP_WALK(btree, b, ctree);
4972 +       DELIP_WALK(ctree, c, dtree);
4973 +
4974 +       if (dtree->expires[d]) {
4975 +               dtree->expires[d] = 0;
4976 +               map->elements--;
4977 +               return 0;
4978 +       }
4979 +       return -EEXIST;
4980 +}
4981 +
4982 +static int
4983 +delip(struct ip_set *set, const void *data, size_t size,
4984 +      ip_set_ip_t *hash_ip)
4985 +{
4986 +       struct ip_set_req_iptree *req =
4987 +           (struct ip_set_req_iptree *) data;
4988 +
4989 +       if (size != sizeof(struct ip_set_req_iptree)) {
4990 +               ip_set_printk("data length wrong (want %zu, have %zu)",
4991 +                             sizeof(struct ip_set_req_iptree),
4992 +                             size);
4993 +               return -EINVAL;
4994 +       }
4995 +       return __delip(set, req->ip, hash_ip);
4996 +}
4997 +
4998 +static int
4999 +delip_kernel(struct ip_set *set, 
5000 +            const struct sk_buff *skb,
5001 +            ip_set_ip_t *hash_ip,
5002 +            const u_int32_t *flags,
5003 +            unsigned char index)
5004 +{
5005 +       return __delip(set,
5006 +                      ntohl(flags[index] & IPSET_SRC 
5007 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5008 +                               ? ip_hdr(skb)->saddr 
5009 +                               : ip_hdr(skb)->daddr),
5010 +#else
5011 +                               ? skb->nh.iph->saddr 
5012 +                               : skb->nh.iph->daddr),
5013 +#endif
5014 +                      hash_ip);
5015 +}
5016 +
5017 +#define LOOP_WALK_BEGIN(map, i, branch) \
5018 +       for (i = 0; i < 256; i++) {     \
5019 +               if (!(map)->tree[i])    \
5020 +                       continue;       \
5021 +               branch = (map)->tree[i]
5022 +
5023 +#define LOOP_WALK_END }
5024 +
5025 +static void ip_tree_gc(unsigned long ul_set)
5026 +{
5027 +       struct ip_set *set = (void *) ul_set;
5028 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5029 +       struct ip_set_iptreeb *btree;
5030 +       struct ip_set_iptreec *ctree;
5031 +       struct ip_set_iptreed *dtree;
5032 +       unsigned int a,b,c,d;
5033 +       unsigned char i,j,k;
5034 +
5035 +       i = j = k = 0;
5036 +       DP("gc: %s", set->name);
5037 +       write_lock_bh(&set->lock);
5038 +       LOOP_WALK_BEGIN(map, a, btree);
5039 +       LOOP_WALK_BEGIN(btree, b, ctree);
5040 +       LOOP_WALK_BEGIN(ctree, c, dtree);
5041 +       for (d = 0; d < 256; d++) {
5042 +               if (dtree->expires[d]) {
5043 +                       DP("gc: %u %u %u %u: expires %lu jiffies %lu",
5044 +                           a, b, c, d,
5045 +                           dtree->expires[d], jiffies);
5046 +                       if (map->timeout
5047 +                           && time_before(dtree->expires[d], jiffies)) {
5048 +                               dtree->expires[d] = 0;
5049 +                               map->elements--;
5050 +                       } else
5051 +                               k = 1;
5052 +               }
5053 +       }
5054 +       if (k == 0) {
5055 +               DP("gc: %s: leaf %u %u %u empty",
5056 +                   set->name, a, b, c);
5057 +               kmem_cache_free(leaf_cachep, dtree);
5058 +               ctree->tree[c] = NULL;
5059 +       } else {
5060 +               DP("gc: %s: leaf %u %u %u not empty",
5061 +                   set->name, a, b, c);
5062 +               j = 1;
5063 +               k = 0;
5064 +       }
5065 +       LOOP_WALK_END;
5066 +       if (j == 0) {
5067 +               DP("gc: %s: branch %u %u empty",
5068 +                   set->name, a, b);
5069 +               kmem_cache_free(branch_cachep, ctree);
5070 +               btree->tree[b] = NULL;
5071 +       } else {
5072 +               DP("gc: %s: branch %u %u not empty",
5073 +                   set->name, a, b);
5074 +               i = 1;
5075 +               j = k = 0;
5076 +       }
5077 +       LOOP_WALK_END;
5078 +       if (i == 0) {
5079 +               DP("gc: %s: branch %u empty",
5080 +                   set->name, a);
5081 +               kmem_cache_free(branch_cachep, btree);
5082 +               map->tree[a] = NULL;
5083 +       } else {
5084 +               DP("gc: %s: branch %u not empty",
5085 +                   set->name, a);
5086 +               i = j = k = 0;
5087 +       }
5088 +       LOOP_WALK_END;
5089 +       write_unlock_bh(&set->lock);
5090 +       
5091 +       map->gc.expires = jiffies + map->gc_interval * HZ;
5092 +       add_timer(&map->gc);
5093 +}
5094 +
5095 +static inline void init_gc_timer(struct ip_set *set)
5096 +{
5097 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5098 +
5099 +       /* Even if there is no timeout for the entries,
5100 +        * we still have to call gc because delete
5101 +        * do not clean up empty branches */
5102 +       map->gc_interval = IPTREE_GC_TIME;
5103 +       init_timer(&map->gc);
5104 +       map->gc.data = (unsigned long) set;
5105 +       map->gc.function = ip_tree_gc;
5106 +       map->gc.expires = jiffies + map->gc_interval * HZ;
5107 +       add_timer(&map->gc);
5108 +}
5109 +
5110 +static int create(struct ip_set *set, const void *data, size_t size)
5111 +{
5112 +       struct ip_set_req_iptree_create *req =
5113 +           (struct ip_set_req_iptree_create *) data;
5114 +       struct ip_set_iptree *map;
5115 +
5116 +       if (size != sizeof(struct ip_set_req_iptree_create)) {
5117 +               ip_set_printk("data length wrong (want %zu, have %zu)",
5118 +                             sizeof(struct ip_set_req_iptree_create),
5119 +                             size);
5120 +               return -EINVAL;
5121 +       }
5122 +
5123 +       map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL);
5124 +       if (!map) {
5125 +               DP("out of memory for %d bytes",
5126 +                  sizeof(struct ip_set_iptree));
5127 +               return -ENOMEM;
5128 +       }
5129 +       memset(map, 0, sizeof(*map));
5130 +       map->timeout = req->timeout;
5131 +       map->elements = 0;
5132 +       set->data = map;
5133 +
5134 +       init_gc_timer(set);
5135 +
5136 +       return 0;
5137 +}
5138 +
5139 +static void __flush(struct ip_set_iptree *map)
5140 +{
5141 +       struct ip_set_iptreeb *btree;
5142 +       struct ip_set_iptreec *ctree;
5143 +       struct ip_set_iptreed *dtree;
5144 +       unsigned int a,b,c;
5145 +
5146 +       LOOP_WALK_BEGIN(map, a, btree);
5147 +       LOOP_WALK_BEGIN(btree, b, ctree);
5148 +       LOOP_WALK_BEGIN(ctree, c, dtree);
5149 +       kmem_cache_free(leaf_cachep, dtree);
5150 +       LOOP_WALK_END;
5151 +       kmem_cache_free(branch_cachep, ctree);
5152 +       LOOP_WALK_END;
5153 +       kmem_cache_free(branch_cachep, btree);
5154 +       LOOP_WALK_END;
5155 +       map->elements = 0;
5156 +}
5157 +
5158 +static void destroy(struct ip_set *set)
5159 +{
5160 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5161 +
5162 +       /* gc might be running */
5163 +       while (!del_timer(&map->gc))
5164 +               msleep(IPTREE_DESTROY_SLEEP);
5165 +       __flush(map);
5166 +       kfree(map);
5167 +       set->data = NULL;
5168 +}
5169 +
5170 +static void flush(struct ip_set *set)
5171 +{
5172 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5173 +       unsigned int timeout = map->timeout;
5174 +       
5175 +       /* gc might be running */
5176 +       while (!del_timer(&map->gc))
5177 +               msleep(IPTREE_DESTROY_SLEEP);
5178 +       __flush(map);
5179 +       memset(map, 0, sizeof(*map));
5180 +       map->timeout = timeout;
5181 +
5182 +       init_gc_timer(set);
5183 +}
5184 +
5185 +static void list_header(const struct ip_set *set, void *data)
5186 +{
5187 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5188 +       struct ip_set_req_iptree_create *header =
5189 +           (struct ip_set_req_iptree_create *) data;
5190 +
5191 +       header->timeout = map->timeout;
5192 +}
5193 +
5194 +static int list_members_size(const struct ip_set *set)
5195 +{
5196 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5197 +       struct ip_set_iptreeb *btree;
5198 +       struct ip_set_iptreec *ctree;
5199 +       struct ip_set_iptreed *dtree;
5200 +       unsigned int a,b,c,d;
5201 +       unsigned int count = 0;
5202 +
5203 +       LOOP_WALK_BEGIN(map, a, btree);
5204 +       LOOP_WALK_BEGIN(btree, b, ctree);
5205 +       LOOP_WALK_BEGIN(ctree, c, dtree);
5206 +       for (d = 0; d < 256; d++) {
5207 +               if (dtree->expires[d]
5208 +                   && (!map->timeout || time_after(dtree->expires[d], jiffies)))
5209 +                       count++;
5210 +       }
5211 +       LOOP_WALK_END;
5212 +       LOOP_WALK_END;
5213 +       LOOP_WALK_END;
5214 +
5215 +       DP("members %u", count);
5216 +       return (count * sizeof(struct ip_set_req_iptree));
5217 +}
5218 +
5219 +static void list_members(const struct ip_set *set, void *data)
5220 +{
5221 +       struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
5222 +       struct ip_set_iptreeb *btree;
5223 +       struct ip_set_iptreec *ctree;
5224 +       struct ip_set_iptreed *dtree;
5225 +       unsigned int a,b,c,d;
5226 +       size_t offset = 0;
5227 +       struct ip_set_req_iptree *entry;
5228 +
5229 +       LOOP_WALK_BEGIN(map, a, btree);
5230 +       LOOP_WALK_BEGIN(btree, b, ctree);
5231 +       LOOP_WALK_BEGIN(ctree, c, dtree);
5232 +       for (d = 0; d < 256; d++) {
5233 +               if (dtree->expires[d]
5234 +                   && (!map->timeout || time_after(dtree->expires[d], jiffies))) {
5235 +                       entry = (struct ip_set_req_iptree *)(data + offset);
5236 +                       entry->ip = ((a << 24) | (b << 16) | (c << 8) | d);
5237 +                       entry->timeout = !map->timeout ? 0 
5238 +                               : (dtree->expires[d] - jiffies)/HZ;
5239 +                       offset += sizeof(struct ip_set_req_iptree);
5240 +               }
5241 +       }
5242 +       LOOP_WALK_END;
5243 +       LOOP_WALK_END;
5244 +       LOOP_WALK_END;
5245 +}
5246 +
5247 +static struct ip_set_type ip_set_iptree = {
5248 +       .typename               = SETTYPE_NAME,
5249 +       .features               = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
5250 +       .protocol_version       = IP_SET_PROTOCOL_VERSION,
5251 +       .create                 = &create,
5252 +       .destroy                = &destroy,
5253 +       .flush                  = &flush,
5254 +       .reqsize                = sizeof(struct ip_set_req_iptree),
5255 +       .addip                  = &addip,
5256 +       .addip_kernel           = &addip_kernel,
5257 +       .delip                  = &delip,
5258 +       .delip_kernel           = &delip_kernel,
5259 +       .testip                 = &testip,
5260 +       .testip_kernel          = &testip_kernel,
5261 +       .header_size            = sizeof(struct ip_set_req_iptree_create),
5262 +       .list_header            = &list_header,
5263 +       .list_members_size      = &list_members_size,
5264 +       .list_members           = &list_members,
5265 +       .me                     = THIS_MODULE,
5266 +};
5267 +
5268 +MODULE_LICENSE("GPL");
5269 +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
5270 +MODULE_DESCRIPTION("iptree type of IP sets");
5271 +module_param(limit, int, 0600);
5272 +MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
5273 +
5274 +static int __init ip_set_iptree_init(void)
5275 +{
5276 +       int ret;
5277 +       
5278 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
5279 +       branch_cachep = kmem_cache_create("ip_set_iptreeb",
5280 +                               sizeof(struct ip_set_iptreeb),
5281 +                               0, 0, NULL);
5282 +#else
5283 +       branch_cachep = kmem_cache_create("ip_set_iptreeb",
5284 +                               sizeof(struct ip_set_iptreeb),
5285 +                               0, 0, NULL, NULL);
5286 +#endif
5287 +       if (!branch_cachep) {
5288 +               printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n");
5289 +               ret = -ENOMEM;
5290 +               goto out;
5291 +       }
5292 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
5293 +       leaf_cachep = kmem_cache_create("ip_set_iptreed",
5294 +                               sizeof(struct ip_set_iptreed),
5295 +                               0, 0, NULL);
5296 +#else
5297 +       leaf_cachep = kmem_cache_create("ip_set_iptreed",
5298 +                               sizeof(struct ip_set_iptreed),
5299 +                               0, 0, NULL, NULL);
5300 +#endif
5301 +       if (!leaf_cachep) {
5302 +               printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n");
5303 +               ret = -ENOMEM;
5304 +               goto free_branch;
5305 +       }
5306 +       ret = ip_set_register_set_type(&ip_set_iptree);
5307 +       if (ret == 0)
5308 +               goto out;
5309 +
5310 +       kmem_cache_destroy(leaf_cachep);
5311 +    free_branch:       
5312 +       kmem_cache_destroy(branch_cachep);
5313 +    out:
5314 +       return ret;
5315 +}
5316 +
5317 +static void __exit ip_set_iptree_fini(void)
5318 +{
5319 +       /* FIXME: possible race with ip_set_create() */
5320 +       ip_set_unregister_set_type(&ip_set_iptree);
5321 +       kmem_cache_destroy(leaf_cachep);
5322 +       kmem_cache_destroy(branch_cachep);
5323 +}
5324 +
5325 +module_init(ip_set_iptree_init);
5326 +module_exit(ip_set_iptree_fini);
5327 diff --git a/net/ipv4/netfilter/ip_set_iptreemap.c b/net/ipv4/netfilter/ip_set_iptreemap.c
5328 new file mode 100644
5329 index 0000000..3825055
5330 --- /dev/null
5331 +++ b/net/ipv4/netfilter/ip_set_iptreemap.c
5332 @@ -0,0 +1,829 @@
5333 +/* Copyright (C) 2007 Sven Wegener <sven.wegener@stealer.net>
5334 + *
5335 + * This program is free software; you can redistribute it and/or modify it
5336 + * under the terms of the GNU General Public License version 2 as published by
5337 + * the Free Software Foundation.
5338 + */
5339 +
5340 +/* This modules implements the iptreemap ipset type. It uses bitmaps to
5341 + * represent every single IPv4 address as a single bit. The bitmaps are managed
5342 + * in a tree structure, where the first three octets of an addresses are used
5343 + * as an index to find the bitmap and the last octet is used as the bit number.
5344 + */
5345 +
5346 +#include <linux/version.h>
5347 +#include <linux/module.h>
5348 +#include <linux/ip.h>
5349 +#include <linux/skbuff.h>
5350 +#include <linux/slab.h>
5351 +#include <linux/delay.h>
5352 +#include <linux/netfilter_ipv4/ip_tables.h>
5353 +#include <linux/netfilter_ipv4/ip_set.h>
5354 +#include <linux/errno.h>
5355 +#include <asm/uaccess.h>
5356 +#include <asm/bitops.h>
5357 +#include <linux/spinlock.h>
5358 +
5359 +#include <linux/netfilter_ipv4/ip_set_iptreemap.h>
5360 +
5361 +#define IPTREEMAP_DEFAULT_GC_TIME (5 * 60)
5362 +#define IPTREEMAP_DESTROY_SLEEP (100)
5363 +
5364 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
5365 +static struct kmem_cache *cachep_b;
5366 +static struct kmem_cache *cachep_c;
5367 +static struct kmem_cache *cachep_d;
5368 +#else
5369 +static kmem_cache_t *cachep_b;
5370 +static kmem_cache_t *cachep_c;
5371 +static kmem_cache_t *cachep_d;
5372 +#endif
5373 +
5374 +static struct ip_set_iptreemap_d *fullbitmap_d;
5375 +static struct ip_set_iptreemap_c *fullbitmap_c;
5376 +static struct ip_set_iptreemap_b *fullbitmap_b;
5377 +
5378 +#if defined(__LITTLE_ENDIAN)
5379 +#define ABCD(a, b, c, d, addr) \
5380 +       do { \
5381 +               a = ((unsigned char *)addr)[3]; \
5382 +               b = ((unsigned char *)addr)[2]; \
5383 +               c = ((unsigned char *)addr)[1]; \
5384 +               d = ((unsigned char *)addr)[0]; \
5385 +       } while (0)
5386 +#elif defined(__BIG_ENDIAN)
5387 +#define ABCD(a,b,c,d,addrp) do {               \
5388 +       a = ((unsigned char *)addrp)[0];        \
5389 +       b = ((unsigned char *)addrp)[1];        \
5390 +       c = ((unsigned char *)addrp)[2];        \
5391 +       d = ((unsigned char *)addrp)[3];        \
5392 +} while (0)
5393 +#else
5394 +#error "Please fix asm/byteorder.h"
5395 +#endif /* __LITTLE_ENDIAN */
5396 +
5397 +#define TESTIP_WALK(map, elem, branch, full) \
5398 +       do { \
5399 +               branch = (map)->tree[elem]; \
5400 +               if (!branch) \
5401 +                       return 0; \
5402 +               else if (branch == full) \
5403 +                       return 1; \
5404 +       } while (0)
5405 +
5406 +#define ADDIP_WALK(map, elem, branch, type, cachep, full) \
5407 +       do { \
5408 +               branch = (map)->tree[elem]; \
5409 +               if (!branch) { \
5410 +                       branch = (type *) kmem_cache_alloc(cachep, GFP_ATOMIC); \
5411 +                       if (!branch) \
5412 +                               return -ENOMEM; \
5413 +                       memset(branch, 0, sizeof(*branch)); \
5414 +                       (map)->tree[elem] = branch; \
5415 +               } else if (branch == full) { \
5416 +                       return -EEXIST; \
5417 +               } \
5418 +       } while (0)
5419 +
5420 +#define ADDIP_RANGE_LOOP(map, a, a1, a2, hint, branch, full, cachep, free) \
5421 +       for (a = a1; a <= a2; a++) { \
5422 +               branch = (map)->tree[a]; \
5423 +               if (branch != full) { \
5424 +                       if ((a > a1 && a < a2) || (hint)) { \
5425 +                               if (branch) \
5426 +                                       free(branch); \
5427 +                               (map)->tree[a] = full; \
5428 +                               continue; \
5429 +                       } else if (!branch) { \
5430 +                               branch = kmem_cache_alloc(cachep, GFP_ATOMIC); \
5431 +                               if (!branch) \
5432 +                                       return -ENOMEM; \
5433 +                               memset(branch, 0, sizeof(*branch)); \
5434 +                               (map)->tree[a] = branch; \
5435 +                       }
5436 +
5437 +#define ADDIP_RANGE_LOOP_END() \
5438 +               } \
5439 +       }
5440 +
5441 +#define DELIP_WALK(map, elem, branch, cachep, full, flags) \
5442 +       do { \
5443 +               branch = (map)->tree[elem]; \
5444 +               if (!branch) { \
5445 +                       return -EEXIST; \
5446 +               } else if (branch == full) { \
5447 +                       branch = kmem_cache_alloc(cachep, flags); \
5448 +                       if (!branch) \
5449 +                               return -ENOMEM; \
5450 +                       memcpy(branch, full, sizeof(*full)); \
5451 +                       (map)->tree[elem] = branch; \
5452 +               } \
5453 +       } while (0)
5454 +
5455 +#define DELIP_RANGE_LOOP(map, a, a1, a2, hint, branch, full, cachep, free, flags) \
5456 +       for (a = a1; a <= a2; a++) { \
5457 +               branch = (map)->tree[a]; \
5458 +               if (branch) { \
5459 +                       if ((a > a1 && a < a2) || (hint)) { \
5460 +                               if (branch != full) \
5461 +                                       free(branch); \
5462 +                               (map)->tree[a] = NULL; \
5463 +                               continue; \
5464 +                       } else if (branch == full) { \
5465 +                               branch = kmem_cache_alloc(cachep, flags); \
5466 +                               if (!branch) \
5467 +                                       return -ENOMEM; \
5468 +                               memcpy(branch, full, sizeof(*branch)); \
5469 +                               (map)->tree[a] = branch; \
5470 +                       }
5471 +
5472 +#define DELIP_RANGE_LOOP_END() \
5473 +               } \
5474 +       }
5475 +
5476 +#define LOOP_WALK_BEGIN(map, i, branch) \
5477 +       for (i = 0; i < 256; i++) { \
5478 +               branch = (map)->tree[i]; \
5479 +               if (likely(!branch)) \
5480 +                       continue;
5481 +
5482 +#define LOOP_WALK_END() \
5483 +       }
5484 +
5485 +#define LOOP_WALK_BEGIN_GC(map, i, branch, full, cachep, count) \
5486 +       count = -256; \
5487 +       for (i = 0; i < 256; i++) { \
5488 +               branch = (map)->tree[i]; \
5489 +               if (likely(!branch)) \
5490 +                       continue; \
5491 +               count++; \
5492 +               if (branch == full) { \
5493 +                       count++; \
5494 +                       continue; \
5495 +               }
5496 +
5497 +#define LOOP_WALK_END_GC(map, i, branch, full, cachep, count) \
5498 +               if (-256 == count) { \
5499 +                       kmem_cache_free(cachep, branch); \
5500 +                       (map)->tree[i] = NULL; \
5501 +               } else if (256 == count) { \
5502 +                       kmem_cache_free(cachep, branch); \
5503 +                       (map)->tree[i] = full; \
5504 +               } \
5505 +       }
5506 +
5507 +#define LOOP_WALK_BEGIN_COUNT(map, i, branch, inrange, count) \
5508 +       for (i = 0; i < 256; i++) { \
5509 +               if (!(map)->tree[i]) { \
5510 +                       if (inrange) { \
5511 +                               count++; \
5512 +                               inrange = 0; \
5513 +                       } \
5514 +                       continue; \
5515 +               } \
5516 +               branch = (map)->tree[i];
5517 +
5518 +#define LOOP_WALK_END_COUNT() \
5519 +       }
5520 +
5521 +#define MIN(a, b) (a < b ? a : b)
5522 +#define MAX(a, b) (a > b ? a : b)
5523 +
5524 +#define GETVALUE1(a, a1, b1, r) \
5525 +       (a == a1 ? b1 : r)
5526 +
5527 +#define GETVALUE2(a, b, a1, b1, c1, r) \
5528 +       (a == a1 && b == b1 ? c1 : r)
5529 +
5530 +#define GETVALUE3(a, b, c, a1, b1, c1, d1, r) \
5531 +       (a == a1 && b == b1 && c == c1 ? d1 : r)
5532 +
5533 +#define CHECK1(a, a1, a2, b1, b2, c1, c2, d1, d2) \
5534 +       ( \
5535 +               GETVALUE1(a, a1, b1, 0) == 0 \
5536 +               && GETVALUE1(a, a2, b2, 255) == 255 \
5537 +               && c1 == 0 \
5538 +               && c2 == 255 \
5539 +               && d1 == 0 \
5540 +               && d2 == 255 \
5541 +       )
5542 +
5543 +#define CHECK2(a, b, a1, a2, b1, b2, c1, c2, d1, d2) \
5544 +       ( \
5545 +               GETVALUE2(a, b, a1, b1, c1, 0) == 0 \
5546 +               && GETVALUE2(a, b, a2, b2, c2, 255) == 255 \
5547 +               && d1 == 0 \
5548 +               && d2 == 255 \
5549 +       )
5550 +
5551 +#define CHECK3(a, b, c, a1, a2, b1, b2, c1, c2, d1, d2) \
5552 +       ( \
5553 +               GETVALUE3(a, b, c, a1, b1, c1, d1, 0) == 0 \
5554 +               && GETVALUE3(a, b, c, a2, b2, c2, d2, 255) == 255 \
5555 +       )
5556 +
5557 +
5558 +static inline void
5559 +free_d(struct ip_set_iptreemap_d *map)
5560 +{
5561 +       kmem_cache_free(cachep_d, map);
5562 +}
5563 +
5564 +static inline void
5565 +free_c(struct ip_set_iptreemap_c *map)
5566 +{
5567 +       struct ip_set_iptreemap_d *dtree;
5568 +       unsigned int i;
5569 +
5570 +       LOOP_WALK_BEGIN(map, i, dtree) {
5571 +               if (dtree != fullbitmap_d)
5572 +                       free_d(dtree);
5573 +       } LOOP_WALK_END();
5574 +
5575 +       kmem_cache_free(cachep_c, map);
5576 +}
5577 +
5578 +static inline void
5579 +free_b(struct ip_set_iptreemap_b *map)
5580 +{
5581 +       struct ip_set_iptreemap_c *ctree;
5582 +       unsigned int i;
5583 +
5584 +       LOOP_WALK_BEGIN(map, i, ctree) {
5585 +               if (ctree != fullbitmap_c)
5586 +                       free_c(ctree);
5587 +       } LOOP_WALK_END();
5588 +
5589 +       kmem_cache_free(cachep_b, map);
5590 +}
5591 +
5592 +static inline int
5593 +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
5594 +{
5595 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5596 +       struct ip_set_iptreemap_b *btree;
5597 +       struct ip_set_iptreemap_c *ctree;
5598 +       struct ip_set_iptreemap_d *dtree;
5599 +       unsigned char a, b, c, d;
5600 +
5601 +       *hash_ip = ip;
5602 +
5603 +       ABCD(a, b, c, d, hash_ip);
5604 +
5605 +       TESTIP_WALK(map, a, btree, fullbitmap_b);
5606 +       TESTIP_WALK(btree, b, ctree, fullbitmap_c);
5607 +       TESTIP_WALK(ctree, c, dtree, fullbitmap_d);
5608 +
5609 +       return !!test_bit(d, (void *) dtree->bitmap);
5610 +}
5611 +
5612 +static int
5613 +testip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip)
5614 +{
5615 +       struct ip_set_req_iptreemap *req = (struct ip_set_req_iptreemap *) data;
5616 +
5617 +       if (size != sizeof(struct ip_set_req_iptreemap)) {
5618 +               ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptreemap), size);
5619 +               return -EINVAL;
5620 +       }
5621 +
5622 +       return __testip(set, req->start, hash_ip);
5623 +}
5624 +
5625 +static int
5626 +testip_kernel(struct ip_set *set, const struct sk_buff *skb, ip_set_ip_t *hash_ip, const u_int32_t *flags, unsigned char index)
5627 +{
5628 +       int res;
5629 +
5630 +       res = __testip(set, 
5631 +                      ntohl(flags[index] & IPSET_SRC 
5632 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5633 +                               ? ip_hdr(skb)->saddr 
5634 +                               : ip_hdr(skb)->daddr),
5635 +#else
5636 +                               ? skb->nh.iph->saddr 
5637 +                               : skb->nh.iph->daddr),
5638 +#endif
5639 +                      hash_ip);
5640 +
5641 +       return (res < 0 ? 0 : res);
5642 +}
5643 +
5644 +static inline int
5645 +__addip_single(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
5646 +{
5647 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5648 +       struct ip_set_iptreemap_b *btree;
5649 +       struct ip_set_iptreemap_c *ctree;
5650 +       struct ip_set_iptreemap_d *dtree;
5651 +       unsigned char a, b, c, d;
5652 +
5653 +       *hash_ip = ip;
5654 +
5655 +       ABCD(a, b, c, d, hash_ip);
5656 +
5657 +       ADDIP_WALK(map, a, btree, struct ip_set_iptreemap_b, cachep_b, fullbitmap_b);
5658 +       ADDIP_WALK(btree, b, ctree, struct ip_set_iptreemap_c, cachep_c, fullbitmap_c);
5659 +       ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreemap_d, cachep_d, fullbitmap_d);
5660 +
5661 +       if (test_and_set_bit(d, (void *) dtree->bitmap))
5662 +               return -EEXIST;
5663 +
5664 +       set_bit(b, (void *) btree->dirty);
5665 +
5666 +       return 0;
5667 +}
5668 +
5669 +static inline int
5670 +__addip_range(struct ip_set *set, ip_set_ip_t start, ip_set_ip_t end, ip_set_ip_t *hash_ip)
5671 +{
5672 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5673 +       struct ip_set_iptreemap_b *btree;
5674 +       struct ip_set_iptreemap_c *ctree;
5675 +       struct ip_set_iptreemap_d *dtree;
5676 +       unsigned int a, b, c, d;
5677 +       unsigned char a1, b1, c1, d1;
5678 +       unsigned char a2, b2, c2, d2;
5679 +
5680 +       if (start == end)
5681 +               return __addip_single(set, start, hash_ip);
5682 +
5683 +       *hash_ip = start;
5684 +
5685 +       ABCD(a1, b1, c1, d1, &start);
5686 +       ABCD(a2, b2, c2, d2, &end);
5687 +
5688 +       /* This is sooo ugly... */
5689 +       ADDIP_RANGE_LOOP(map, a, a1, a2, CHECK1(a, a1, a2, b1, b2, c1, c2, d1, d2), btree, fullbitmap_b, cachep_b, free_b) {
5690 +               ADDIP_RANGE_LOOP(btree, b, GETVALUE1(a, a1, b1, 0), GETVALUE1(a, a2, b2, 255), CHECK2(a, b, a1, a2, b1, b2, c1, c2, d1, d2), ctree, fullbitmap_c, cachep_c, free_c) {
5691 +                       ADDIP_RANGE_LOOP(ctree, c, GETVALUE2(a, b, a1, b1, c1, 0), GETVALUE2(a, b, a2, b2, c2, 255), CHECK3(a, b, c, a1, a2, b1, b2, c1, c2, d1, d2), dtree, fullbitmap_d, cachep_d, free_d) {
5692 +                               for (d = GETVALUE3(a, b, c, a1, b1, c1, d1, 0); d <= GETVALUE3(a, b, c, a2, b2, c2, d2, 255); d++)
5693 +                                       set_bit(d, (void *) dtree->bitmap);
5694 +                               set_bit(b, (void *) btree->dirty);
5695 +                       } ADDIP_RANGE_LOOP_END();
5696 +               } ADDIP_RANGE_LOOP_END();
5697 +       } ADDIP_RANGE_LOOP_END();
5698 +
5699 +       return 0;
5700 +}
5701 +
5702 +static int
5703 +addip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip)
5704 +{
5705 +       struct ip_set_req_iptreemap *req = (struct ip_set_req_iptreemap *) data;
5706 +
5707 +       if (size != sizeof(struct ip_set_req_iptreemap)) {
5708 +               ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptreemap), size);
5709 +               return -EINVAL;
5710 +       }
5711 +
5712 +       return __addip_range(set, MIN(req->start, req->end), MAX(req->start, req->end), hash_ip);
5713 +}
5714 +
5715 +static int
5716 +addip_kernel(struct ip_set *set, const struct sk_buff *skb, ip_set_ip_t *hash_ip, const u_int32_t *flags, unsigned char index)
5717 +{
5718 +
5719 +       return __addip_single(set,
5720 +                       ntohl(flags[index] & IPSET_SRC 
5721 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5722 +                               ? ip_hdr(skb)->saddr 
5723 +                               : ip_hdr(skb)->daddr),
5724 +#else
5725 +                               ? skb->nh.iph->saddr 
5726 +                               : skb->nh.iph->daddr),
5727 +#endif
5728 +                       hash_ip);
5729 +}
5730 +
5731 +static inline int
5732 +__delip_single(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip, unsigned int __nocast flags)
5733 +{
5734 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5735 +       struct ip_set_iptreemap_b *btree;
5736 +       struct ip_set_iptreemap_c *ctree;
5737 +       struct ip_set_iptreemap_d *dtree;
5738 +       unsigned char a,b,c,d;
5739 +
5740 +       *hash_ip = ip;
5741 +
5742 +       ABCD(a, b, c, d, hash_ip);
5743 +
5744 +       DELIP_WALK(map, a, btree, cachep_b, fullbitmap_b, flags);
5745 +       DELIP_WALK(btree, b, ctree, cachep_c, fullbitmap_c, flags);
5746 +       DELIP_WALK(ctree, c, dtree, cachep_d, fullbitmap_d, flags);
5747 +
5748 +       if (!test_and_clear_bit(d, (void *) dtree->bitmap))
5749 +               return -EEXIST;
5750 +
5751 +       set_bit(b, (void *) btree->dirty);
5752 +
5753 +       return 0;
5754 +}
5755 +
5756 +static inline int
5757 +__delip_range(struct ip_set *set, ip_set_ip_t start, ip_set_ip_t end, ip_set_ip_t *hash_ip, unsigned int __nocast flags)
5758 +{
5759 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5760 +       struct ip_set_iptreemap_b *btree;
5761 +       struct ip_set_iptreemap_c *ctree;
5762 +       struct ip_set_iptreemap_d *dtree;
5763 +       unsigned int a, b, c, d;
5764 +       unsigned char a1, b1, c1, d1;
5765 +       unsigned char a2, b2, c2, d2;
5766 +
5767 +       if (start == end)
5768 +               return __delip_single(set, start, hash_ip, flags);
5769 +
5770 +       *hash_ip = start;
5771 +
5772 +       ABCD(a1, b1, c1, d1, &start);
5773 +       ABCD(a2, b2, c2, d2, &end);
5774 +
5775 +       /* This is sooo ugly... */
5776 +       DELIP_RANGE_LOOP(map, a, a1, a2, CHECK1(a, a1, a2, b1, b2, c1, c2, d1, d2), btree, fullbitmap_b, cachep_b, free_b, flags) {
5777 +               DELIP_RANGE_LOOP(btree, b, GETVALUE1(a, a1, b1, 0), GETVALUE1(a, a2, b2, 255), CHECK2(a, b, a1, a2, b1, b2, c1, c2, d1, d2), ctree, fullbitmap_c, cachep_c, free_c, flags) {
5778 +                       DELIP_RANGE_LOOP(ctree, c, GETVALUE2(a, b, a1, b1, c1, 0), GETVALUE2(a, b, a2, b2, c2, 255), CHECK3(a, b, c, a1, a2, b1, b2, c1, c2, d1, d2), dtree, fullbitmap_d, cachep_d, free_d, flags) {
5779 +                               for (d = GETVALUE3(a, b, c, a1, b1, c1, d1, 0); d <= GETVALUE3(a, b, c, a2, b2, c2, d2, 255); d++)
5780 +                                       clear_bit(d, (void *) dtree->bitmap);
5781 +                               set_bit(b, (void *) btree->dirty);
5782 +                       } DELIP_RANGE_LOOP_END();
5783 +               } DELIP_RANGE_LOOP_END();
5784 +       } DELIP_RANGE_LOOP_END();
5785 +
5786 +       return 0;
5787 +}
5788 +
5789 +static int
5790 +delip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip)
5791 +{
5792 +       struct ip_set_req_iptreemap *req = (struct ip_set_req_iptreemap *) data;
5793 +
5794 +       if (size != sizeof(struct ip_set_req_iptreemap)) {
5795 +               ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptreemap), size);
5796 +               return -EINVAL;
5797 +       }
5798 +
5799 +       return __delip_range(set, MIN(req->start, req->end), MAX(req->start, req->end), hash_ip, GFP_KERNEL);
5800 +}
5801 +
5802 +static int
5803 +delip_kernel(struct ip_set *set, const struct sk_buff *skb, ip_set_ip_t *hash_ip, const u_int32_t *flags, unsigned char index)
5804 +{
5805 +       return __delip_single(set, 
5806 +                       ntohl(flags[index] & IPSET_SRC 
5807 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5808 +                               ? ip_hdr(skb)->saddr 
5809 +                               : ip_hdr(skb)->daddr),
5810 +#else
5811 +                               ? skb->nh.iph->saddr 
5812 +                               : skb->nh.iph->daddr),
5813 +#endif
5814 +                       hash_ip,
5815 +                       GFP_ATOMIC);
5816 +}
5817 +
5818 +/* Check the status of the bitmap
5819 + * -1 == all bits cleared
5820 + *  1 == all bits set
5821 + *  0 == anything else
5822 + */
5823 +static inline int
5824 +bitmap_status(struct ip_set_iptreemap_d *dtree)
5825 +{
5826 +       unsigned char first = dtree->bitmap[0];
5827 +       int a;
5828 +
5829 +       for (a = 1; a < 32; a++)
5830 +               if (dtree->bitmap[a] != first)
5831 +                       return 0;
5832 +
5833 +       return (first == 0 ? -1 : (first == 255 ? 1 : 0));
5834 +}
5835 +
5836 +static void
5837 +gc(unsigned long addr)
5838 +{
5839 +       struct ip_set *set = (struct ip_set *) addr;
5840 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5841 +       struct ip_set_iptreemap_b *btree;
5842 +       struct ip_set_iptreemap_c *ctree;
5843 +       struct ip_set_iptreemap_d *dtree;
5844 +       unsigned int a, b, c;
5845 +       int i, j, k;
5846 +
5847 +       write_lock_bh(&set->lock);
5848 +
5849 +       LOOP_WALK_BEGIN_GC(map, a, btree, fullbitmap_b, cachep_b, i) {
5850 +               LOOP_WALK_BEGIN_GC(btree, b, ctree, fullbitmap_c, cachep_c, j) {
5851 +                       if (!test_and_clear_bit(b, (void *) btree->dirty))
5852 +                               continue;
5853 +                       LOOP_WALK_BEGIN_GC(ctree, c, dtree, fullbitmap_d, cachep_d, k) {
5854 +                               switch (bitmap_status(dtree)) {
5855 +                                       case -1:
5856 +                                               kmem_cache_free(cachep_d, dtree);
5857 +                                               ctree->tree[c] = NULL;
5858 +                                               k--;
5859 +                                       break;
5860 +                                       case 1:
5861 +                                               kmem_cache_free(cachep_d, dtree);
5862 +                                               ctree->tree[c] = fullbitmap_d;
5863 +                                               k++;
5864 +                                       break;
5865 +                               }
5866 +                       } LOOP_WALK_END();
5867 +               } LOOP_WALK_END_GC(btree, b, ctree, fullbitmap_c, cachep_c, k);
5868 +       } LOOP_WALK_END_GC(map, a, btree, fullbitmap_b, cachep_b, j);
5869 +
5870 +       write_unlock_bh(&set->lock);
5871 +
5872 +       map->gc.expires = jiffies + map->gc_interval * HZ;
5873 +       add_timer(&map->gc);
5874 +}
5875 +
5876 +static inline void
5877 +init_gc_timer(struct ip_set *set)
5878 +{
5879 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5880 +
5881 +       init_timer(&map->gc);
5882 +       map->gc.data = (unsigned long) set;
5883 +       map->gc.function = gc;
5884 +       map->gc.expires = jiffies + map->gc_interval * HZ;
5885 +       add_timer(&map->gc);
5886 +}
5887 +
5888 +static int create(struct ip_set *set, const void *data, size_t size)
5889 +{
5890 +       struct ip_set_req_iptreemap_create *req = (struct ip_set_req_iptreemap_create *) data;
5891 +       struct ip_set_iptreemap *map;
5892 +
5893 +       if (size != sizeof(struct ip_set_req_iptreemap_create)) {
5894 +               ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptreemap_create), size);
5895 +               return -EINVAL;
5896 +       }
5897 +
5898 +       map = kzalloc(sizeof(*map), GFP_KERNEL);
5899 +       if (!map)
5900 +               return -ENOMEM;
5901 +
5902 +       map->gc_interval = req->gc_interval ? req->gc_interval : IPTREEMAP_DEFAULT_GC_TIME;
5903 +       set->data = map;
5904 +
5905 +       init_gc_timer(set);
5906 +
5907 +       return 0;
5908 +}
5909 +
5910 +static inline void __flush(struct ip_set_iptreemap *map)
5911 +{
5912 +       struct ip_set_iptreemap_b *btree;
5913 +       unsigned int a;
5914 +
5915 +       LOOP_WALK_BEGIN(map, a, btree);
5916 +               if (btree != fullbitmap_b)
5917 +                       free_b(btree);
5918 +       LOOP_WALK_END();
5919 +}
5920 +
5921 +static void destroy(struct ip_set *set)
5922 +{
5923 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5924 +
5925 +       while (!del_timer(&map->gc))
5926 +               msleep(IPTREEMAP_DESTROY_SLEEP);
5927 +
5928 +       __flush(map);
5929 +       kfree(map);
5930 +
5931 +       set->data = NULL;
5932 +}
5933 +
5934 +static void flush(struct ip_set *set)
5935 +{
5936 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5937 +
5938 +       while (!del_timer(&map->gc))
5939 +               msleep(IPTREEMAP_DESTROY_SLEEP);
5940 +
5941 +       __flush(map);
5942 +
5943 +       memset(map, 0, sizeof(*map));
5944 +
5945 +       init_gc_timer(set);
5946 +}
5947 +
5948 +static void list_header(const struct ip_set *set, void *data)
5949 +{
5950 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5951 +       struct ip_set_req_iptreemap_create *header = (struct ip_set_req_iptreemap_create *) data;
5952 +
5953 +       header->gc_interval = map->gc_interval;
5954 +}
5955 +
5956 +static int list_members_size(const struct ip_set *set)
5957 +{
5958 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5959 +       struct ip_set_iptreemap_b *btree;
5960 +       struct ip_set_iptreemap_c *ctree;
5961 +       struct ip_set_iptreemap_d *dtree;
5962 +       unsigned int a, b, c, d, inrange = 0, count = 0;
5963 +
5964 +       LOOP_WALK_BEGIN_COUNT(map, a, btree, inrange, count) {
5965 +               LOOP_WALK_BEGIN_COUNT(btree, b, ctree, inrange, count) {
5966 +                       LOOP_WALK_BEGIN_COUNT(ctree, c, dtree, inrange, count) {
5967 +                               for (d = 0; d < 256; d++) {
5968 +                                       if (test_bit(d, (void *) dtree->bitmap)) {
5969 +                                               inrange = 1;
5970 +                                       } else if (inrange) {
5971 +                                               count++;
5972 +                                               inrange = 0;
5973 +                                       }
5974 +                               }
5975 +                       } LOOP_WALK_END_COUNT();
5976 +               } LOOP_WALK_END_COUNT();
5977 +       } LOOP_WALK_END_COUNT();
5978 +
5979 +       if (inrange)
5980 +               count++;
5981 +
5982 +       return (count * sizeof(struct ip_set_req_iptreemap));
5983 +}
5984 +
5985 +static inline size_t add_member(void *data, size_t offset, ip_set_ip_t start, ip_set_ip_t end)
5986 +{
5987 +       struct ip_set_req_iptreemap *entry = (struct ip_set_req_iptreemap *) (data + offset);
5988 +
5989 +       entry->start = start;
5990 +       entry->end = end;
5991 +
5992 +       return sizeof(*entry);
5993 +}
5994 +
5995 +static void list_members(const struct ip_set *set, void *data)
5996 +{
5997 +       struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data;
5998 +       struct ip_set_iptreemap_b *btree;
5999 +       struct ip_set_iptreemap_c *ctree;
6000 +       struct ip_set_iptreemap_d *dtree;
6001 +       unsigned int a, b, c, d, inrange = 0;
6002 +       size_t offset = 0;
6003 +       ip_set_ip_t start = 0, end = 0, ip;
6004 +
6005 +       LOOP_WALK_BEGIN(map, a, btree) {
6006 +               LOOP_WALK_BEGIN(btree, b, ctree) {
6007 +                       LOOP_WALK_BEGIN(ctree, c, dtree) {
6008 +                               for (d = 0; d < 256; d++) {
6009 +                                       if (test_bit(d, (void *) dtree->bitmap)) {
6010 +                                               ip = ((a << 24) | (b << 16) | (c << 8) | d);
6011 +                                               if (!inrange) {
6012 +                                                       inrange = 1;
6013 +                                                       start = ip;
6014 +                                               } else if (end < ip - 1) {
6015 +                                                       offset += add_member(data, offset, start, end);
6016 +                                                       start = ip;
6017 +                                               }
6018 +                                               end = ip;
6019 +                                       } else if (inrange) {
6020 +                                               offset += add_member(data, offset, start, end);
6021 +                                               inrange = 0;
6022 +                                       }
6023 +                               }
6024 +                       } LOOP_WALK_END();
6025 +               } LOOP_WALK_END();
6026 +       } LOOP_WALK_END();
6027 +
6028 +       if (inrange)
6029 +               add_member(data, offset, start, end);
6030 +}
6031 +
6032 +static struct ip_set_type ip_set_iptreemap = {
6033 +       .typename               = SETTYPE_NAME,
6034 +       .features               = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
6035 +       .protocol_version       = IP_SET_PROTOCOL_VERSION,
6036 +       .create                 = create,
6037 +       .destroy                = destroy,
6038 +       .flush                  = flush,
6039 +       .reqsize                = sizeof(struct ip_set_req_iptreemap),
6040 +       .addip                  = addip,
6041 +       .addip_kernel           = addip_kernel,
6042 +       .delip                  = delip,
6043 +       .delip_kernel           = delip_kernel,
6044 +       .testip                 = testip,
6045 +       .testip_kernel          = testip_kernel,
6046 +       .header_size            = sizeof(struct ip_set_req_iptreemap_create),
6047 +       .list_header            = list_header,
6048 +       .list_members_size      = list_members_size,
6049 +       .list_members           = list_members,
6050 +       .me                     = THIS_MODULE,
6051 +};
6052 +
6053 +MODULE_LICENSE("GPL");
6054 +MODULE_AUTHOR("Sven Wegener <sven.wegener@stealer.net>");
6055 +MODULE_DESCRIPTION("iptreemap type of IP sets");
6056 +
6057 +static int __init ip_set_iptreemap_init(void)
6058 +{
6059 +       int ret = -ENOMEM;
6060 +       int a;
6061 +
6062 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
6063 +       cachep_b = kmem_cache_create("ip_set_iptreemap_b", 
6064 +                                    sizeof(struct ip_set_iptreemap_b), 
6065 +                                    0, 0, NULL);
6066 +#else
6067 +       cachep_b = kmem_cache_create("ip_set_iptreemap_b", 
6068 +                                    sizeof(struct ip_set_iptreemap_b), 
6069 +                                    0, 0, NULL, NULL);
6070 +#endif
6071 +       if (!cachep_b) {
6072 +               ip_set_printk("Unable to create ip_set_iptreemap_b slab cache");
6073 +               goto out;
6074 +       }
6075 +
6076 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
6077 +       cachep_c = kmem_cache_create("ip_set_iptreemap_c", 
6078 +                                    sizeof(struct ip_set_iptreemap_c),
6079 +                                    0, 0, NULL);
6080 +#else
6081 +       cachep_c = kmem_cache_create("ip_set_iptreemap_c", 
6082 +                                    sizeof(struct ip_set_iptreemap_c),
6083 +                                    0, 0, NULL, NULL);
6084 +#endif
6085 +       if (!cachep_c) {
6086 +               ip_set_printk("Unable to create ip_set_iptreemap_c slab cache");
6087 +               goto outb;
6088 +       }
6089 +
6090 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
6091 +       cachep_d = kmem_cache_create("ip_set_iptreemap_d",
6092 +                                    sizeof(struct ip_set_iptreemap_d),
6093 +                                    0, 0, NULL);
6094 +#else
6095 +       cachep_d = kmem_cache_create("ip_set_iptreemap_d",
6096 +                                    sizeof(struct ip_set_iptreemap_d),
6097 +                                    0, 0, NULL, NULL);
6098 +#endif
6099 +       if (!cachep_d) {
6100 +               ip_set_printk("Unable to create ip_set_iptreemap_d slab cache");
6101 +               goto outc;
6102 +       }
6103 +
6104 +       fullbitmap_d = kmem_cache_alloc(cachep_d, GFP_KERNEL);
6105 +       if (!fullbitmap_d)
6106 +               goto outd;
6107 +
6108 +       fullbitmap_c = kmem_cache_alloc(cachep_c, GFP_KERNEL);
6109 +       if (!fullbitmap_c)
6110 +               goto outbitmapd;
6111 +
6112 +       fullbitmap_b = kmem_cache_alloc(cachep_b, GFP_KERNEL);
6113 +       if (!fullbitmap_b)
6114 +               goto outbitmapc;
6115 +
6116 +       ret = ip_set_register_set_type(&ip_set_iptreemap);
6117 +       if (0 > ret)
6118 +               goto outbitmapb;
6119 +
6120 +       /* Now init our global bitmaps */
6121 +       memset(fullbitmap_d->bitmap, 0xff, sizeof(fullbitmap_d->bitmap));
6122 +
6123 +       for (a = 0; a < 256; a++)
6124 +               fullbitmap_c->tree[a] = fullbitmap_d;
6125 +
6126 +       for (a = 0; a < 256; a++)
6127 +               fullbitmap_b->tree[a] = fullbitmap_c;
6128 +       memset(fullbitmap_b->dirty, 0, sizeof(fullbitmap_b->dirty));
6129 +
6130 +       return 0;
6131 +
6132 +outbitmapb:
6133 +       kmem_cache_free(cachep_b, fullbitmap_b);
6134 +outbitmapc:
6135 +       kmem_cache_free(cachep_c, fullbitmap_c);
6136 +outbitmapd:
6137 +       kmem_cache_free(cachep_d, fullbitmap_d);
6138 +outd:
6139 +       kmem_cache_destroy(cachep_d);
6140 +outc:
6141 +       kmem_cache_destroy(cachep_c);
6142 +outb:
6143 +       kmem_cache_destroy(cachep_b);
6144 +out:
6145 +
6146 +       return ret;
6147 +}
6148 +
6149 +static void __exit ip_set_iptreemap_fini(void)
6150 +{
6151 +       ip_set_unregister_set_type(&ip_set_iptreemap);
6152 +       kmem_cache_free(cachep_d, fullbitmap_d);
6153 +       kmem_cache_free(cachep_c, fullbitmap_c);
6154 +       kmem_cache_free(cachep_b, fullbitmap_b);
6155 +       kmem_cache_destroy(cachep_d);
6156 +       kmem_cache_destroy(cachep_c);
6157 +       kmem_cache_destroy(cachep_b);
6158 +}
6159 +
6160 +module_init(ip_set_iptreemap_init);
6161 +module_exit(ip_set_iptreemap_fini);
6162 diff --git a/net/ipv4/netfilter/ip_set_macipmap.c b/net/ipv4/netfilter/ip_set_macipmap.c
6163 new file mode 100644
6164 index 0000000..8ca2159
6165 --- /dev/null
6166 +++ b/net/ipv4/netfilter/ip_set_macipmap.c
6167 @@ -0,0 +1,375 @@
6168 +/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
6169 + *                         Patrick Schaaf <bof@bof.de>
6170 + *                         Martin Josefsson <gandalf@wlug.westbo.se>
6171 + * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
6172 + *
6173 + * This program is free software; you can redistribute it and/or modify
6174 + * it under the terms of the GNU General Public License version 2 as
6175 + * published by the Free Software Foundation.  
6176 + */
6177 +
6178 +/* Kernel module implementing an IP set type: the macipmap type */
6179 +
6180 +#include <linux/module.h>
6181 +#include <linux/ip.h>
6182 +#include <linux/skbuff.h>
6183 +#include <linux/version.h>
6184 +#include <linux/netfilter_ipv4/ip_tables.h>
6185 +#include <linux/netfilter_ipv4/ip_set.h>
6186 +#include <linux/errno.h>
6187 +#include <asm/uaccess.h>
6188 +#include <asm/bitops.h>
6189 +#include <linux/spinlock.h>
6190 +#include <linux/if_ether.h>
6191 +#include <linux/vmalloc.h>
6192 +
6193 +#include <linux/netfilter_ipv4/ip_set_malloc.h>
6194 +#include <linux/netfilter_ipv4/ip_set_macipmap.h>
6195 +
6196 +static int
6197 +testip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip)
6198 +{
6199 +       struct ip_set_macipmap *map = (struct ip_set_macipmap *) set->data;
6200 +       struct ip_set_macip *table = (struct ip_set_macip *) map->members;      
6201 +       struct ip_set_req_macipmap *req = (struct ip_set_req_macipmap *) data;
6202 +
6203 +       if (size != sizeof(struct ip_set_req_macipmap)) {
6204 +               ip_set_printk("data length wrong (want %zu, have %zu)",
6205 +                             sizeof(struct ip_set_req_macipmap),
6206 +                             size);
6207 +               return -EINVAL;
6208 +       }
6209 +
6210 +       if (req->ip < map->first_ip || req->ip > map->last_ip)
6211 +               return -ERANGE;
6212 +
6213 +       *hash_ip = req->ip;
6214 +       DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u",
6215 +          set->name, HIPQUAD(req->ip), HIPQUAD(*hash_ip));             
6216 +       if (test_bit(IPSET_MACIP_ISSET,
6217 +                    (void *) &table[req->ip - map->first_ip].flags)) {
6218 +               return (memcmp(req->ethernet,
6219 +                              &table[req->ip - map->first_ip].ethernet,
6220 +                              ETH_ALEN) == 0);
6221 +       } else {
6222 +               return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0);
6223 +       }
6224 +}
6225 +
6226 +static int
6227 +testip_kernel(struct ip_set *set, 
6228 +             const struct sk_buff *skb,
6229 +             ip_set_ip_t *hash_ip,
6230 +             const u_int32_t *flags,
6231 +             unsigned char index)
6232 +{
6233 +       struct ip_set_macipmap *map =
6234 +           (struct ip_set_macipmap *) set->data;
6235 +       struct ip_set_macip *table =
6236 +           (struct ip_set_macip *) map->members;
6237 +       ip_set_ip_t ip;
6238 +       
6239 +       ip = ntohl(flags[index] & IPSET_SRC
6240 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
6241 +                       ? ip_hdr(skb)->saddr 
6242 +                       : ip_hdr(skb)->daddr);
6243 +#else
6244 +                       ? skb->nh.iph->saddr
6245 +                       : skb->nh.iph->daddr);
6246 +#endif
6247 +
6248 +       if (ip < map->first_ip || ip > map->last_ip)
6249 +               return 0;
6250 +
6251 +       *hash_ip = ip;  
6252 +       DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u",
6253 +          set->name, HIPQUAD(ip), HIPQUAD(*hash_ip));          
6254 +       if (test_bit(IPSET_MACIP_ISSET,
6255 +           (void *) &table[ip - map->first_ip].flags)) {
6256 +               /* Is mac pointer valid?
6257 +                * If so, compare... */
6258 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
6259 +               return (skb_mac_header(skb) >= skb->head
6260 +                       && (skb_mac_header(skb) + ETH_HLEN) <= skb->data
6261 +#else
6262 +               return (skb->mac.raw >= skb->head
6263 +                       && (skb->mac.raw + ETH_HLEN) <= skb->data
6264 +#endif
6265 +                       && (memcmp(eth_hdr(skb)->h_source,
6266 +                                  &table[ip - map->first_ip].ethernet,
6267 +                                  ETH_ALEN) == 0));
6268 +       } else {
6269 +               return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0);
6270 +       }
6271 +}
6272 +
6273 +/* returns 0 on success */
6274 +static inline int
6275 +__addip(struct ip_set *set, 
6276 +       ip_set_ip_t ip, unsigned char *ethernet, ip_set_ip_t *hash_ip)
6277 +{
6278 +       struct ip_set_macipmap *map =
6279 +           (struct ip_set_macipmap *) set->data;
6280 +       struct ip_set_macip *table =
6281 +           (struct ip_set_macip *) map->members;
6282 +
6283 +       if (ip < map->first_ip || ip > map->last_ip)
6284 +               return -ERANGE;
6285 +       if (test_and_set_bit(IPSET_MACIP_ISSET, 
6286 +                            (void *) &table[ip - map->first_ip].flags))
6287 +               return -EEXIST;
6288 +
6289 +       *hash_ip = ip;
6290 +       DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip));
6291 +       memcpy(&table[ip - map->first_ip].ethernet, ethernet, ETH_ALEN);
6292 +       return 0;
6293 +}
6294 +
6295 +static int
6296 +addip(struct ip_set *set, const void *data, size_t size,
6297 +      ip_set_ip_t *hash_ip)
6298 +{
6299 +       struct ip_set_req_macipmap *req =
6300 +           (struct ip_set_req_macipmap *) data;
6301 +
6302 +       if (size != sizeof(struct ip_set_req_macipmap)) {
6303 +               ip_set_printk("data length wrong (want %zu, have %zu)",
6304 +                             sizeof(struct ip_set_req_macipmap),
6305 +                             size);
6306 +               return -EINVAL;
6307 +       }
6308 +       return __addip(set, req->ip, req->ethernet, hash_ip);
6309 +}
6310 +
6311 +static int
6312 +addip_kernel(struct ip_set *set, 
6313 +            const struct sk_buff *skb,
6314 +            ip_set_ip_t *hash_ip,
6315 +            const u_int32_t *flags,
6316 +            unsigned char index)
6317 +{
6318 +       ip_set_ip_t ip;
6319 +       
6320 +       ip = ntohl(flags[index] & IPSET_SRC
6321 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
6322 +                       ? ip_hdr(skb)->saddr 
6323 +                       : ip_hdr(skb)->daddr);
6324 +#else
6325 +                       ? skb->nh.iph->saddr
6326 +                       : skb->nh.iph->daddr);
6327 +#endif
6328 +
6329 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
6330 +       if (!(skb_mac_header(skb) >= skb->head
6331 +             && (skb_mac_header(skb) + ETH_HLEN) <= skb->data))
6332 +#else
6333 +       if (!(skb->mac.raw >= skb->head
6334 +             && (skb->mac.raw + ETH_HLEN) <= skb->data))
6335 +#endif
6336 +               return -EINVAL;
6337 +
6338 +       return __addip(set, ip, eth_hdr(skb)->h_source, hash_ip);
6339 +}
6340 +
6341 +static inline int
6342 +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
6343 +{
6344 +       struct ip_set_macipmap *map =
6345 +           (struct ip_set_macipmap *) set->data;
6346 +       struct ip_set_macip *table =
6347 +           (struct ip_set_macip *) map->members;
6348 +
6349 +       if (ip < map->first_ip || ip > map->last_ip)
6350 +               return -ERANGE;
6351 +       if (!test_and_clear_bit(IPSET_MACIP_ISSET, 
6352 +                               (void *)&table[ip - map->first_ip].flags))
6353 +               return -EEXIST;
6354 +
6355 +       *hash_ip = ip;
6356 +       DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip));
6357 +       return 0;
6358 +}
6359 +
6360 +static int
6361 +delip(struct ip_set *set, const void *data, size_t size,
6362 +     ip_set_ip_t *hash_ip)
6363 +{
6364 +       struct ip_set_req_macipmap *req =
6365 +           (struct ip_set_req_macipmap *) data;
6366 +
6367 +       if (size != sizeof(struct ip_set_req_macipmap)) {
6368 +               ip_set_printk("data length wrong (want %zu, have %zu)",
6369 +                             sizeof(struct ip_set_req_macipmap),
6370 +                             size);
6371 +               return -EINVAL;
6372 +       }
6373 +  &