
From: Paulo Marques <pmarques@grupopie.com>

This patch removes the is-exported bit from the last patch and implements a
complete type char, so that /proc/kallsyms resembles better the System.map
file.

In fact, if compiled with KALLSYMS_ALL the only differences between
/proc/kallsyms and System.map are the symbols that are left out on purpose:
types 'A' and 'U', and kallsyms_xxx.

I removed these symbols from System.map and diff'ed against /proc/kallsyms
and the files where completely identical :)

The System.map file occupied about 980Kb whereas the kallsyms data needed
to generate the same output occupied about 440Kb.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/kernel/kallsyms.c  |   52 +++++++++++----------
 25-akpm/scripts/kallsyms.c |  108 ++++++++++++++++-----------------------------
 2 files changed, 67 insertions(+), 93 deletions(-)

diff -puN kernel/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms kernel/kallsyms.c
--- 25/kernel/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms	2004-09-02 20:17:58.411147624 -0700
+++ 25-akpm/kernel/kallsyms.c	2004-09-02 20:17:58.417146712 -0700
@@ -51,17 +51,16 @@ static inline int is_kernel_text(unsigne
    given the offset to where the symbol is in the compressed stream */
 static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
 {
-	int len;
+	int len, skipped_first = 0;
 	u8 *tptr, *data;
 
-	/* get the compressed symbol length from the first symbol byte,
-	 * masking out the "is_exported" bit */
+	/* get the compressed symbol length from the first symbol byte */
 	data = &kallsyms_names[off];
-	len = (*data) & 0x7F;
+	len = *data;
 	data++;
 
 	/* update the offset to return the offset for the next symbol on
-	   the compressed stream */
+	 * the compressed stream */
 	off += len + 1;
 
 	/* for every byte on the compressed symbol data, copy the table
@@ -72,8 +71,11 @@ static unsigned int kallsyms_expand_symb
 		len--;
 
 		while (*tptr) {
-			*result = *tptr;
-			result++;
+			if(skipped_first) {
+				*result = *tptr;
+				result++;
+			} else
+				skipped_first = 1;
 			tptr++;
 		}
 	}
@@ -84,24 +86,33 @@ static unsigned int kallsyms_expand_symb
 	return off;
 }
 
+/* get symbol type information. This is encoded as a single char at the
+ * begining of the symbol name */
+static char kallsyms_get_symbol_type(unsigned int off)
+{
+	/* get just the first code, look it up in the token table, and return the
+	 * first char from this token */
+	return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
+}
+
+
 /* find the offset on the compressed stream given and index in the
-   kallsyms array */
+ * kallsyms array */
 static unsigned int get_symbol_offset(unsigned long pos)
 {
 	u8 *name;
 	int i;
 
-        /* use the closest marker we have. We have markers every
-           256 positions, so that should be close enough */
+	/* use the closest marker we have. We have markers every 256 positions,
+	 * so that should be close enough */
 	name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
 
-        /* sequentially scan all the symbols up to the point we're
-           searching for. Every symbol is stored in a
-	   [bit 7: is_exported | bits 6..0: <len>][<len> bytes of data]
-	   format, so we just need to add the len to the current
-	   pointer for every symbol we wish to skip */
+	/* sequentially scan all the symbols up to the point we're searching for.
+	 * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
+	 * just need to add the len to the current pointer for every symbol we
+	 * wish to skip */
 	for(i = 0; i < (pos&0xFF); i++)
-		name = name + ((*name) & 0x7F) + 1;
+		name = name + (*name) + 1;
 
 	return name - kallsyms_names;
 }
@@ -244,14 +255,7 @@ static unsigned long get_ksymbol_core(st
 	iter->owner = NULL;
 	iter->value = kallsyms_addresses[iter->pos];
 
-	if (is_kernel_text(iter->value) || is_kernel_inittext(iter->value))
-		iter->type = 't';
-	else
-		iter->type = 'd';
-
-	/* check the "is_exported" bit on the compressed stream */
-	if (kallsyms_names[off] & 0x80)
-		iter->type += 'A' - 'a';
+	iter->type = kallsyms_get_symbol_type(off);
 
 	off = kallsyms_expand_symbol(off, iter->name);
 
diff -puN scripts/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms scripts/kallsyms.c
--- 25/scripts/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms	2004-09-02 20:17:58.412147472 -0700
+++ 25-akpm/scripts/kallsyms.c	2004-09-02 20:17:58.418146560 -0700
@@ -55,7 +55,6 @@
 /* flags to mark symbols */
 #define SYM_FLAG_VALID		1
 #define SYM_FLAG_SAMPLED	2
-#define SYM_FLAG_EXPORTED	4
 
 struct sym_entry {
 	unsigned long long addr;
@@ -68,12 +67,9 @@ struct sym_entry {
 
 static struct sym_entry *table;
 static int size, cnt;
-static unsigned long long _stext, _etext, _sinittext, _einittext, _start_ksymtab, _stop_ksymtab;
+static unsigned long long _stext, _etext, _sinittext, _einittext;
 static int all_symbols = 0;
 
-/* aray of pointers into the symbol table sorted by name */
-static struct sym_entry **sorted_table;
-
 struct token {
 	unsigned char data[MAX_TOK_SIZE];
 	unsigned char len;
@@ -125,45 +121,56 @@ read_symbol(FILE *in, struct sym_entry *
 		_sinittext = s->addr;
 	else if (strcmp(str, "_einittext") == 0)
 		_einittext = s->addr;
-	else if (strcmp(str, "__start___ksymtab") == 0)
-		_start_ksymtab = s->addr;
-	else if (strcmp(str, "__stop___ksymtab") == 0)
-		_stop_ksymtab = s->addr;
 	else if (toupper(s->type) == 'A' || toupper(s->type) == 'U')
 		return -1;
 
-	s->sym = strdup(str);
-	s->len = strlen(str);
+	/* include the type field in the symbol name, so that it gets
+	 * compressed together */
+	s->len = strlen(str) + 1;
+	s->sym = (char *) malloc(s->len + 1);
+	strcpy(s->sym + 1, str);
+	s->sym[0] = s->type;
+
 	return 0;
 }
 
 static int
 symbol_valid(struct sym_entry *s)
 {
+	/* Symbols which vary between passes.  Passes 1 and 2 must have
+	 * identical symbol lists.  The kallsyms_* symbols below are only added
+	 * after pass 1, they would be included in pass 2 when --all-symbols is
+	 * specified so exclude them to get a stable symbol list.
+	 */
+	static char *special_symbols[] = {
+		"kallsyms_addresses",
+		"kallsyms_num_syms",
+		"kallsyms_names",
+		"kallsyms_markers",
+		"kallsyms_token_table",
+		"kallsyms_token_index",
+
+	/* Exclude linker generated symbols which vary between passes */
+		"_SDA_BASE_",		/* ppc */
+		"_SDA2_BASE_",		/* ppc */
+		NULL };
+	int i;
+
+	/* if --all-symbols is not specified, then symbols outside the text
+	 * and inittext sections are discarded */
 	if (!all_symbols) {
 		if ((s->addr < _stext || s->addr > _etext)
 		    && (s->addr < _sinittext || s->addr > _einittext))
 			return 0;
 	}
 
-	/* Exclude symbols which vary between passes.  Passes 1 and 2 must have
-	 * identical symbol lists.  The kallsyms_* symbols below are only added
-	 * after pass 1, they would be included in pass 2 when --all-symbols is
-	 * specified so exclude them to get a stable symbol list.
-	 */
-	if (strstr(s->sym, "_compiled.") ||
-	    strcmp(s->sym, "kallsyms_addresses") == 0 ||
-	    strcmp(s->sym, "kallsyms_num_syms") == 0 ||
-	    strcmp(s->sym, "kallsyms_names") == 0 ||
-	    strcmp(s->sym, "kallsyms_markers") == 0 ||
-	    strcmp(s->sym, "kallsyms_token_table") == 0 ||
-	    strcmp(s->sym, "kallsyms_token_index") == 0)
+	/* Exclude symbols which vary between passes. */
+	if (strstr(s->sym + 1, "_compiled."))
 		return 0;
 
-	/* Exclude linker generated symbols which vary between passes */
-	if (strcmp(s->sym, "_SDA_BASE_") == 0 ||	/* ppc */
-	    strcmp(s->sym, "_SDA2_BASE_") == 0)		/* ppc */
-		return 0;
+	for (i = 0; special_symbols[i]; i++)
+		if( strcmp(s->sym + 1, special_symbols[i]) == 0 )
+			return 0;
 
 	return 1;
 }
@@ -267,9 +274,7 @@ write_src(void)
 		if ((valid & 0xFF) == 0)
 			markers[valid >> 8] = off;
 
-		k = table[i].len;
-		if (table[i].flags & SYM_FLAG_EXPORTED) k |= 0x80;
-		printf("\t.byte 0x%02x", k);
+		printf("\t.byte 0x%02x", table[i].len);
 		for (k = 0; k < table[i].len; k++)
 			printf(", 0x%02x", table[i].sym[k]);
 		printf("\n");
@@ -463,47 +468,11 @@ static void forget_symbol(unsigned char 
 		forget_token(symbol + i, len - i);
 }
 
-static int symbol_sort(const void *a, const void *b)
-{
-	return strcmp( (*((struct sym_entry **) a))->sym,
-				(*((struct sym_entry **) b))->sym );
-}
-
-
-/* find out if a symbol is exported. Exported symbols have a corresponding
- * __ksymtab_<symbol> entry and their addresses are between __start___ksymtab
- * and __stop___ksymtab */
-static int is_exported(char *name)
-{
-	struct sym_entry key, *ksym, **result;
-	char buf[KSYM_NAME_LEN+32];
-
-	sprintf(buf, "__ksymtab_%s", name);
-	key.sym = buf;
-
-	ksym = &key;
-	result = bsearch(&ksym, sorted_table, cnt,
-				sizeof(struct sym_entry *), symbol_sort);
-
-	if(!result) return 0;
-
-	ksym = *result;
-
-	return ((ksym->addr >= _start_ksymtab) && (ksym->addr < _stop_ksymtab));
-}
-
 /* set all the symbol flags and do the initial token count */
 static void build_initial_tok_table(void)
 {
 	int i, use_it, valid;
 
-	/* build a sorted symbol pointer array so that searching a particular
-	 * symbol is faster */
-	sorted_table = (struct sym_entry **) malloc(sizeof(struct sym_entry *) * cnt);
-	for (i = 0; i < cnt; i++)
-		sorted_table[i] = &table[i];
-	qsort(sorted_table, cnt, sizeof(struct sym_entry *), symbol_sort);
-
 	valid = 0;
 	for (i = 0; i < cnt; i++) {
 		table[i].flags = 0;
@@ -515,6 +484,10 @@ static void build_initial_tok_table(void
 
 	use_it = 0;
 	for (i = 0; i < cnt; i++) {
+
+		/* subsample the available symbols. This method is almost like
+		 * a Bresenham's algorithm to get uniformly distributed samples
+		 * across the symbol table */
 		if (table[i].flags & SYM_FLAG_VALID) {
 
 			use_it += WORKING_SET;
@@ -523,9 +496,6 @@ static void build_initial_tok_table(void
 				table[i].flags |= SYM_FLAG_SAMPLED;
 				use_it -= valid;
 			}
-
-			if( is_exported(table[i].sym) )
-				table[i].flags |= SYM_FLAG_EXPORTED;
 		}
 		if (table[i].flags & SYM_FLAG_SAMPLED)
 			learn_symbol(table[i].sym, table[i].len);
_
