

Add radix-tree tagging so we can look up dirty or writeback pages in
O(log64(n)) time.

-ECRAPPYCHANGELOG.


---

 include/linux/radix-tree.h |   61 +++++-
 lib/radix-tree.c           |  441 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 436 insertions(+), 66 deletions(-)

diff -puN include/linux/radix-tree.h~radix-tree-tagging include/linux/radix-tree.h
--- 25/include/linux/radix-tree.h~radix-tree-tagging	2004-03-10 10:12:07.000000000 -0800
+++ 25-akpm/include/linux/radix-tree.h	2004-03-10 10:12:07.000000000 -0800
@@ -20,8 +20,32 @@
 #define _LINUX_RADIX_TREE_H
 
 #include <linux/preempt.h>
+#include <linux/types.h>
 
-struct radix_tree_node;
+/*
+ * Radix tree node definition.
+ *
+ * RADIX_TREE_MAP_SHIFT must be >= log2(BITS_PER_LONG).  Otherwise the tags
+ * array will have zero size and the set_tag() arithmetic will go wrong.
+ */
+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT	6
+#else
+#define RADIX_TREE_MAP_SHIFT	3	/* For more stressful testing */
+#endif
+#define RADIX_TREE_TAGS		2
+
+#define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
+
+#define RADIX_TREE_TAG_LONGS	\
+	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct radix_tree_node {
+	unsigned int	count;
+	void		*slots[RADIX_TREE_MAP_SIZE];
+	unsigned long	tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
+};
 
 struct radix_tree_root {
 	unsigned int		height;
@@ -29,25 +53,40 @@ struct radix_tree_root {
 	struct radix_tree_node	*rnode;
 };
 
-#define RADIX_TREE_INIT(mask)	{0, (mask), NULL}
+#define RADIX_TREE_INIT(mask)	{					\
+	.height = 0,							\
+	.gfp_mask = (mask),						\
+	.rnode = NULL,							\
+}
 
 #define RADIX_TREE(name, mask) \
 	struct radix_tree_root name = RADIX_TREE_INIT(mask)
 
-#define INIT_RADIX_TREE(root, mask)	\
-do {					\
-	(root)->height = 0;		\
-	(root)->gfp_mask = (mask);	\
-	(root)->rnode = NULL;		\
+#define INIT_RADIX_TREE(root, mask)					\
+do {									\
+	(root)->height = 0;						\
+	(root)->gfp_mask = (mask);					\
+	(root)->rnode = NULL;						\
 } while (0)
 
-extern int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
-extern void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-extern void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-extern unsigned int
+int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(int gfp_mask);
+void radix_tree_init(void);
+void *radix_tree_tag_set(struct radix_tree_root *root,
+			unsigned long index, int tag);
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+			unsigned long index, int tag);
+int radix_tree_tag_get(struct radix_tree_root *root,
+			unsigned long index, int tag);
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+		unsigned long first_index, unsigned int max_items, int tag);
+int radix_tree_tagged(struct radix_tree_root *root, int tag);
 
 static inline void radix_tree_preload_end(void)
 {
diff -puN lib/radix-tree.c~radix-tree-tagging lib/radix-tree.c
--- 25/lib/radix-tree.c~radix-tree-tagging	2004-03-10 10:12:07.000000000 -0800
+++ 25-akpm/lib/radix-tree.c	2004-03-10 10:51:14.000000000 -0800
@@ -6,12 +6,12 @@
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2, or (at
  * your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
@@ -26,21 +26,11 @@
 #include <linux/slab.h>
 #include <linux/gfp.h>
 #include <linux/string.h>
-
-/*
- * Radix tree node definition.
- */
-#define RADIX_TREE_MAP_SHIFT  6
-#define RADIX_TREE_MAP_SIZE  (1UL << RADIX_TREE_MAP_SHIFT)
-#define RADIX_TREE_MAP_MASK  (RADIX_TREE_MAP_SIZE-1)
-
-struct radix_tree_node {
-	unsigned int	count;
-	void		*slots[RADIX_TREE_MAP_SIZE];
-};
+#include <linux/bitops.h>
 
 struct radix_tree_path {
 	struct radix_tree_node *node, **slot;
+	int offset;
 };
 
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
@@ -86,7 +76,7 @@ radix_tree_node_alloc(struct radix_tree_
 }
 
 static inline void
-radix_tree_node_free(struct radix_tree_node *node)
+radix_tree_node_free(struct radix_tree_root *root, struct radix_tree_node *node)
 {
 	kmem_cache_free(radix_tree_node_cachep, node);
 }
@@ -122,6 +112,22 @@ out:
 	return ret;
 }
 
+static inline void tag_set(struct radix_tree_node *node, int tag, int offset)
+{
+	if (!test_bit(offset, &node->tags[tag][0]))
+		__set_bit(offset, &node->tags[tag][0]);
+}
+
+static inline void tag_clear(struct radix_tree_node *node, int tag, int offset)
+{
+	__clear_bit(offset, &node->tags[tag][0]);
+}
+
+static inline int tag_get(struct radix_tree_node *node, int tag, int offset)
+{
+	return test_bit(offset, &node->tags[tag][0]);
+}
+
 /*
  *	Return the maximum key which can be store into a
  *	radix tree with height HEIGHT.
@@ -138,26 +144,53 @@ static int radix_tree_extend(struct radi
 {
 	struct radix_tree_node *node;
 	unsigned int height;
+	char tags[RADIX_TREE_TAGS];
+	int tag;
 
 	/* Figure out what the height should be.  */
 	height = root->height + 1;
 	while (index > radix_tree_maxindex(height))
 		height++;
 
-	if (root->rnode) {
-		do {
-			if (!(node = radix_tree_node_alloc(root)))
-				return -ENOMEM;
-
-			/* Increase the height.  */
-			node->slots[0] = root->rnode;
-			node->count = 1;
-			root->rnode = node;
-			root->height++;
-		} while (height > root->height);
-	} else 
+	if (root->rnode == NULL) {
 		root->height = height;
+		goto out;
+	}
 
+	/*
+	 * Prepare the tag status of the top-level node for propagation
+	 * into the newly-pushed top-level node(s)
+	 */
+	for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
+		int idx;
+
+		tags[tag] = 0;
+		for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+			if (root->rnode->tags[tag][idx]) {
+				tags[tag] = 1;
+				break;
+			}
+		}
+	}
+
+	do {
+		if (!(node = radix_tree_node_alloc(root)))
+			return -ENOMEM;
+
+		/* Increase the height.  */
+		node->slots[0] = root->rnode;
+
+		/* Propagate the aggregated tag info into the new root */
+		for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
+			if (tags[tag])
+				tag_set(node, tag, 0);
+		}
+
+		node->count = 1;
+		root->rnode = node;
+		root->height++;
+	} while (height > root->height);
+out:
 	return 0;
 }
 
@@ -169,23 +202,27 @@ static int radix_tree_extend(struct radi
  *
  *	Insert an item into the radix tree at position @index.
  */
-int radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item)
+int radix_tree_insert(struct radix_tree_root *root,
+			unsigned long index, void *item)
 {
 	struct radix_tree_node *node = NULL, *tmp, **slot;
 	unsigned int height, shift;
+	int offset;
 	int error;
 
 	/* Make sure the tree is high enough.  */
-	if (index > radix_tree_maxindex(root->height)) {
+	if ((!index && !root->rnode) ||
+			index > radix_tree_maxindex(root->height)) {
 		error = radix_tree_extend(root, index);
 		if (error)
 			return error;
 	}
-    
+
 	slot = &root->rnode;
 	height = root->height;
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 
+	offset = 0;			/* uninitialised var warning */
 	while (height > 0) {
 		if (*slot == NULL) {
 			/* Have to add a child node.  */
@@ -196,18 +233,21 @@ int radix_tree_insert(struct radix_tree_
 				node->count++;
 		}
 
-		/* Go a level down.  */
+		/* Go a level down */
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		node = *slot;
-		slot = (struct radix_tree_node **)
-			(node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK));
+		slot = (struct radix_tree_node **)(node->slots + offset);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
 
 	if (*slot != NULL)
 		return -EEXIST;
-	if (node)
+	if (node) {
 		node->count++;
+		BUG_ON(tag_get(node, 0, offset));
+		BUG_ON(tag_get(node, 1, offset));
+	}
 
 	*slot = item;
 	return 0;
@@ -219,7 +259,7 @@ EXPORT_SYMBOL(radix_tree_insert);
  *	@root:		radix tree root
  *	@index:		index key
  *
- *	Lookup them item at the position @index in the radix tree @root.
+ *	Lookup the item at the position @index in the radix tree @root.
  */
 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 {
@@ -238,16 +278,174 @@ void *radix_tree_lookup(struct radix_tre
 			return NULL;
 
 		slot = (struct radix_tree_node **)
-			((*slot)->slots + ((index >> shift) & RADIX_TREE_MAP_MASK));
+			((*slot)->slots +
+				((index >> shift) & RADIX_TREE_MAP_MASK));
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
 
-	return (void *) *slot;
+	return *slot;
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-static /* inline */ unsigned int
+/**
+ *	radix_tree_tag_set - set a tag on a radix tree node
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@tag: 		tag index
+ *
+ *	Set the search tag corresponging to @index in the radix tree.  From
+ *	the root all the way down to the leaf node.
+ *
+ *	Returns the address of the tagged item.   Setting a tag on a not-present
+ *	item is a bug.
+ */
+void *radix_tree_tag_set(struct radix_tree_root *root,
+			unsigned long index, int tag)
+{
+	unsigned int height, shift;
+	struct radix_tree_node **slot;
+
+	height = root->height;
+	if (index > radix_tree_maxindex(height))
+		return NULL;
+
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+	slot = &root->rnode;
+
+	while (height > 0) {
+		int offset;
+
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		tag_set(*slot, tag, offset);
+		slot = (struct radix_tree_node **)((*slot)->slots + offset);
+		BUG_ON(*slot == NULL);
+		shift -= RADIX_TREE_MAP_SHIFT;
+		height--;
+	}
+
+	return *slot;
+}
+EXPORT_SYMBOL(radix_tree_tag_set);
+
+/**
+ *	radix_tree_tag_clear - clear a tag on a radix tree node
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@tag: 		tag index
+ *
+ *	Clear the search tag corresponging to @index in the radix tree.  If
+ *	this causes the leaf node to have no tags set then clear the tag in the
+ *	next-to-leaf node, etc.
+ *
+ *	Returns the address of the tagged item on success, else NULL.  ie:
+ *	has the same return value and semantics as radix_tree_lookup().
+ */
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+			unsigned long index, int tag)
+{
+	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+	unsigned int height, shift;
+	void *ret = NULL;
+
+	height = root->height;
+	if (index > radix_tree_maxindex(height))
+		goto out;
+
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+	pathp->node = NULL;
+	pathp->slot = &root->rnode;
+
+	while (height > 0) {
+		int offset;
+
+		if (*pathp->slot == NULL)
+			goto out;
+
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		pathp[1].offset = offset;
+		pathp[1].node = *pathp[0].slot;
+		pathp[1].slot = (struct radix_tree_node **)
+				(pathp[1].node->slots + offset);
+		pathp++;
+		shift -= RADIX_TREE_MAP_SHIFT;
+		height--;
+	}
+
+	ret = *pathp[0].slot;
+	if (ret == NULL)
+		goto out;
+
+	do {
+		int idx;
+
+		tag_clear(pathp[0].node, tag, pathp[0].offset);
+		for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+			if (pathp[0].node->tags[tag][idx])
+				goto out;
+		}
+		pathp--;
+	} while (pathp[0].node);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(radix_tree_tag_clear);
+
+#ifndef __KERNEL__	/* Only the test harness uses this at present */
+/**
+ *	radix_tree_tag_get - get a tag on a radix tree node
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@tag: 		tag index
+ *
+ *	Return the search tag corresponging to @index in the radix tree.
+ *
+ *	Returns zero if the tag is unset, or if there is no corresponding item
+ *	in the tree.
+ */
+int radix_tree_tag_get(struct radix_tree_root *root,
+			unsigned long index, int tag)
+{
+	unsigned int height, shift;
+	struct radix_tree_node **slot;
+	int saw_unset_tag = 0;
+
+	height = root->height;
+	if (index > radix_tree_maxindex(height))
+		return 0;
+
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+	slot = &root->rnode;
+
+	for ( ; ; ) {
+		int offset;
+
+		if (*slot == NULL)
+			return 0;
+
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+		/*
+		 * This is just a debug check.  Later, we can bale as soon as
+		 * we see an unset tag.
+		 */
+		if (!tag_get(*slot, tag, offset))
+			saw_unset_tag = 1;
+		if (height == 1) {
+			int ret = tag_get(*slot, tag, offset);
+
+			BUG_ON(ret && saw_unset_tag);
+			return ret;
+		}
+		slot = (struct radix_tree_node **)((*slot)->slots + offset);
+		shift -= RADIX_TREE_MAP_SHIFT;
+		height--;
+	}
+}
+EXPORT_SYMBOL(radix_tree_tag_get);
+#endif
+
+static unsigned int
 __lookup(struct radix_tree_root *root, void **results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index)
 {
@@ -314,17 +512,6 @@ radix_tree_gang_lookup(struct radix_tree
 	unsigned long cur_index = first_index;
 	unsigned int ret = 0;
 
-	if (root->rnode == NULL)
-		goto out;
-	if (max_index == 0) {			/* Bah.  Special case */
-		if (first_index == 0) {
-			if (max_items > 0) {
-				*results = root->rnode;
-				ret = 1;
-			}
-		}
-		goto out;
-	}
 	while (ret < max_items) {
 		unsigned int nr_found;
 		unsigned long next_index;	/* Index of next search */
@@ -338,11 +525,101 @@ radix_tree_gang_lookup(struct radix_tree
 			break;
 		cur_index = next_index;
 	}
-out:
 	return ret;
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup);
 
+/*
+ * FIXME: the two tag_get()s here should use find_next_bit() instead of
+ * open-coding the search.
+ */
+static unsigned int
+__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
+	unsigned int max_items, unsigned long *next_index, int tag)
+{
+	unsigned int nr_found = 0;
+	unsigned int shift;
+	unsigned int height = root->height;
+	struct radix_tree_node *slot;
+
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+	slot = root->rnode;
+
+	while (height > 0) {
+		unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+		for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
+			if (tag_get(slot, tag, i)) {
+				BUG_ON(slot->slots[i] == NULL);
+				break;
+			}
+			index &= ~((1 << shift) - 1);
+			index += 1 << shift;
+			if (index == 0)
+				goto out;	/* 32-bit wraparound */
+		}
+		if (i == RADIX_TREE_MAP_SIZE)
+			goto out;
+		height--;
+		if (height == 0) {	/* Bottom level: grab some items */
+			unsigned long j = index & RADIX_TREE_MAP_MASK;
+
+			for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
+				index++;
+				if (tag_get(slot, tag, j)) {
+					BUG_ON(slot->slots[j] == NULL);
+					results[nr_found++] = slot->slots[j];
+					if (nr_found == max_items)
+						goto out;
+				}
+			}
+		}
+		shift -= RADIX_TREE_MAP_SHIFT;
+		slot = slot->slots[i];
+	}
+out:
+	*next_index = index;
+	return nr_found;
+}
+
+/**
+ *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
+ *	                             based on a tag
+ *	@root:		radix tree root
+ *	@results:	where the results of the lookup are placed
+ *	@first_index:	start the lookup from this key
+ *	@max_items:	place up to this many items at *results
+ *	@tag:		the tag index
+ *
+ *	Performs an index-ascending scan of the tree for present items which
+ *	have the tag indexed by @tag set.  Places the items at *@results and
+ *	returns the number of items which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+		unsigned long first_index, unsigned int max_items, int tag)
+{
+	const unsigned long max_index = radix_tree_maxindex(root->height);
+	unsigned long cur_index = first_index;
+	unsigned int ret = 0;
+
+	while (ret < max_items) {
+		unsigned int nr_found;
+		unsigned long next_index;	/* Index of next search */
+
+		if (cur_index > max_index)
+			break;
+		nr_found = __lookup_tag(root, results + ret, cur_index,
+					max_items - ret, &next_index, tag);
+		ret += nr_found;
+		if (next_index == 0)
+			break;
+		cur_index = next_index;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
+
 /**
  *	radix_tree_delete    -    delete an item from a radix tree
  *	@root:		radix tree root
@@ -355,24 +632,31 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
 	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+	struct radix_tree_path *orig_pathp;
 	unsigned int height, shift;
 	void *ret = NULL;
+	char tags[RADIX_TREE_TAGS];
+	int nr_cleared_tags;
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
 		goto out;
 
-	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	pathp->node = NULL;
 	pathp->slot = &root->rnode;
 
 	while (height > 0) {
+		int offset;
+
 		if (*pathp->slot == NULL)
 			goto out;
 
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		pathp[1].offset = offset;
 		pathp[1].node = *pathp[0].slot;
 		pathp[1].slot = (struct radix_tree_node **)
-		    (pathp[1].node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK));
+				(pathp[1].node->slots + offset);
 		pathp++;
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -382,20 +666,67 @@ void *radix_tree_delete(struct radix_tre
 	if (ret == NULL)
 		goto out;
 
+	orig_pathp = pathp;
+
+	/*
+	 * Clear all tags associated with the just-deleted item
+	 */
+	memset(tags, 0, sizeof(tags));
+	do {
+		int tag;
+
+		nr_cleared_tags = RADIX_TREE_TAGS;
+		for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
+			int idx;
+
+			if (!tags[tag])
+				tag_clear(pathp[0].node, tag, pathp[0].offset);
+
+			for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+				if (pathp[0].node->tags[tag][idx]) {
+					tags[tag] = 1;
+					nr_cleared_tags--;
+					break;
+				}
+			}
+		}
+		pathp--;
+	} while (pathp[0].node && nr_cleared_tags);
+
+	pathp = orig_pathp;
 	*pathp[0].slot = NULL;
 	while (pathp[0].node && --pathp[0].node->count == 0) {
 		pathp--;
+		BUG_ON(*pathp[0].slot == NULL);
 		*pathp[0].slot = NULL;
-		radix_tree_node_free(pathp[1].node);
+		radix_tree_node_free(root, pathp[1].node);
 	}
-
 	if (root->rnode == NULL)
-		root->height = 0;  /* Empty tree, we can reset the height */
+		root->height = 0;
 out:
 	return ret;
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
+/**
+ *	radix_tree_tagged - test whether any items in the tree are tagged
+ *	@root:		radix tree root
+ *	@tag:		tag to test
+ */
+int radix_tree_tagged(struct radix_tree_root *root, int tag)
+{
+	int idx;
+
+	if (!root->rnode)
+		return 0;
+	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+		if (root->rnode->tags[tag][idx])
+			return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(radix_tree_tagged);
+
 static void
 radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
 {

_
