/*
**  Copyright (c) 2006 Sendmail, Inc. and its suppliers.
**	All rights reserved.
**
**  $Id: util.c,v 1.31 2006/06/12 23:06:08 msk Exp $
*/

#ifndef lint
static char util_c_id[] = "@(#)$Id: util.c,v 1.31 2006/06/12 23:06:08 msk Exp $";
#endif /* !lint */

/* system includes */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <string.h>
#include <assert.h>
#include <syslog.h>
#include <errno.h>
#include <stdlib.h>
#include <ctype.h>

/* libsm includes */
#include <sm/string.h>

/* nesting-filter includes */
#include "nesting-filter.h"
#include "util.h"

#define	MINSTRINGLEN	16
#define	MINVECTORLEN	16

#ifndef MIN
# define MIN(x,y)	((x) < (y) ? (x) : (y))
#endif /* ! MIN */

/*
**  TOKENS_NEW -- allocate a new TOKENS handle
**
**  Parameters:
**  	None.
**
**  Return value:
**  	A new TOKENS handle, or NULL on failure.
*/

TOKENS
tokens_new(void)
{
	int save_errno;
	TOKENS new;

	new = (TOKENS) malloc(sizeof *new);
	if (new == NULL)
		return NULL;

	new->tok_data = string_new(0);
	if (new->tok_data == NULL)
	{
		save_errno = errno;
		free(new);
		errno = save_errno;
		return NULL;
	}

	new->tok_elems = vector_new(0);
	if (new->tok_elems == NULL)
	{
		save_errno = errno;
		string_free(new->tok_data);
		free(new);
		errno = save_errno;
		return NULL;
	}

	new->tok_strings = vector_new(0);
	if (new->tok_strings == NULL)
	{
		save_errno = errno;
		vector_free(new->tok_elems);
		string_free(new->tok_data);
		free(new);
		errno = save_errno;
		return NULL;
	}
	return new;
}

/*
**  TOKENS_FREE -- release resources associated with a TOKENS handle
**
**  Parameters:
**  	t -- TOKENS handle to be destroyed
**
**  Return value:
**  	None.
*/

void
tokens_free(TOKENS t)
{
	assert(t != NULL);

	string_free(t->tok_data);
	vector_free(t->tok_elems);
	vector_free(t->tok_strings);
	free(t);
}

/*
**  TOKENS_GETDATA -- return the STRING associated with a TOKENS handle
**
**  Parameters:
**  	t -- TOKENS handle to be queried
**
**  Return value:
**  	The STRING handle stored in "t".
*/

STRING
tokens_getdata(TOKENS t)
{
	assert(t != NULL);

	return t->tok_data;
}

/*
**  TOKENS_GETTOKENS -- retrieve the list of tokens inside a TOKENS handle
**
**  Parameters:
**  	t -- TOKENS handle to be queried
**
**  Return value:
**  	The VECTOR handle stored in "t".
**
**  Notes:
**  	This will be an empty vector if tokens_tokenize() hasn't been called
**  	yet.
*/

unsigned char **
tokens_gettokens(TOKENS t)
{
	assert(t != NULL);

	return (unsigned char **) vector_data(t->tok_elems);
}

/*
**  TOKENS_TOKENIZE -- tokenize a string into a vector of elements
**
**  Parameters:
**  	t -- TOKENS handle to be scanned
**  	toks -- array of token characters for this parse pass
**  	ctoks -- array of pairs of comment tokens (optional)
**
**  Return value:
**  	Number of tokens created, -1 on parse error, -2 on other error.
**
**  Notes:
**  	Creates an array of tokens based on the input string, which should
**  	be found in t->tok_data and thus should have already been populated
**  	by the caller.  It applies the usual escaping, quotation and whitespace
**  	semantics, tokenizing as it goes:
**
**  	- any character in "toks" is considered atomic and terminates the
**  	  current token; if there is no current token, a new one is created
**  	  containing only that token character
**  	- a '\' character escapes the next character but is itself discarded
**  	- a '"' character toggles the "quoted" state but is itself discarded
**  	- characters between balanced comment tokens are discarded
**  	- unescaped, unquoted whitespace terminates a token but does not
**  	  start a new one
**  	- unescaped comment tokens (if any) terminate the current token but
**        don't start a new one
**  	- any other character begins a token, or gets added to the previous one
**  	  if the previous one doesn't start with an atom
**
**  	Unbalanced comment characters or quotation marks cause a parse error
**  	to be reported.
*/

int
tokens_tokenize(TOKENS t, unsigned char *toks, unsigned char *ctoks)
{
	bool escaped;
	bool intoken;
	bool quoted;
	int comment;
	int len = 0;
	int c;
	int strings;
	unsigned char *p;
	STRING curtoken;

	assert(t != NULL);
	assert(toks != NULL);

	escaped = FALSE;
	quoted = FALSE;
	intoken = FALSE;
	comment = 0;
	strings = vector_length(t->tok_strings);

	for (c = 0; c < strings; c++)
	{
		curtoken = vector_index(t->tok_strings, c);
		string_blank(curtoken);
	}

	for (p = string_data(t->tok_data); *p != '\0'; p++)
	{
		/* comment delimiter? */
		if (!escaped && !quoted && ctoks != NULL)
		{
			unsigned int offset;
			unsigned char *q;

			q = strchr(ctoks, *p);
			if (q != NULL)
			{
				intoken = FALSE;

				offset = q - ctoks;

				if (offset % 2 == 1)
					comment--;
				else
					comment++;

				if (comment == -1)
					return -1;

				continue;
			}
		}

		/* inside a comment; ignore */
		if (comment > 0)
			continue;

		/* backslash; escape the next character */
		if (!escaped && *p == '\\')
		{
			escaped = TRUE;
			continue;
		}

		/* unescaped quotation mark; toggle "quoted" */
		if (!escaped && *p == '"')
		{
			quoted = !quoted;
			continue;
		}

		/* unescaped special character; it is its own token */
		if (!escaped && !quoted && strchr(toks, *p) != NULL)
		{
			if (len < strings)
			{
				curtoken = vector_index(t->tok_strings, len);
				string_blank(curtoken);
			}
			else
			{
				STRING new;

				new = string_new(0);
				if (new == NULL)
					return -2;

				if (vector_append(t->tok_strings, new) == -1)
					return -2;

				curtoken = new;
			}

			len++;

			if (string_cat1(curtoken, *p) == -1)
				return -2;

			intoken = FALSE;
		}

		/* not in a token */
		else if (!intoken)
		{
			/* if not whitespace, start a new token */
			if (isascii(*p) && !isspace(*p))
			{
				if (len < strings)
				{
					curtoken = vector_index(t->tok_strings,
					                        len);
					string_blank(curtoken);
				}
				else
				{
					STRING new;

					new = string_new(0);
					if (new == NULL)
						return -2;

					if (vector_append(t->tok_strings,
					                  new) == -1)
						return -2;

					curtoken = new;
				}

				len++;

				if (string_cat1(curtoken, *p) == -1)
					return -2;
				escaped = FALSE;
				intoken = TRUE;
			}
		}

		/* in a token */
		else
		{
			/* unquoted and unescaped whitespace ends a token */
			if (!quoted && !escaped && isascii(*p) && isspace(*p))
			{
				intoken = FALSE;
				continue;
			}

			/* anything else gets appended */
			if (string_cat1(curtoken, *p) == -1)
				return -2;
			escaped = FALSE;
		}
	}

	/* parse error if unbalanced quotes or comment characters found */
	if (quoted || comment != 0)
		return -1;

	vector_clear(t->tok_elems);
	for (c = 0; c < len; c++)
	{
		curtoken = vector_index(t->tok_strings, c);

		if (vector_append(t->tok_elems, string_data(curtoken)) == -1)
			return -2;
	}

	return len;
}

/*
**  VECTOR_REMOVE -- return and remove a specific entry from a VECTOR
**
**  Parameters:
**  	v -- VECTOR of interest
**  	idx -- index to be returned and removed
**
**  Return value:
**  	Pointer at "idx", or NULL if "idx" is out-of-range.
*/

void *
vector_remove(VECTOR v, int idx)
{
	void *ret;

	assert(v != NULL);

	if (idx >= v->vector_len || idx < 0)
		return NULL;

	ret = v->vector_data[idx];

	if (idx != vector_length(v) - 1)
	{
		memmove(&v->vector_data[idx], &v->vector_data[idx + 1],
		        sizeof(void *) * (v->vector_len - 1));
	}

	v->vector_len--;

	return ret;
}

/*
**  VECTOR_INDEX -- return a requested data element in a VECTOR
**
**  Parameters:
**  	v -- VECTOR of interest
**  	idx -- index to be returned
**
**  Return value:
**  	The value found in the idx-th element of VECTOR v.  If "idx"
**  	is out of range, NULL is always returned.
*/

void *
vector_index(VECTOR v, int idx)
{
	assert(v != NULL);

	if (idx >= vector_length(v) || idx < 0)
		return NULL;

	return v->vector_data[idx];
}

/*
**  VECTOR_LENGTH -- report number of elements in a VECTOR
**
**  Parameters:
**  	v -- VECTOR of interest
**
**  Return value:
**  	Number of elements currently in the vector.
*/

int
vector_length(VECTOR v)
{
	assert(v != NULL);

	return v->vector_len;
}

/*
**  VECTOR_APPEND -- add an element to a VECTOR
**
**  Parameters:
**  	v -- VECTOR to update
**  	p -- pointer to append
**
**  Return value:
**  	Number of elements in the vector after the append, or -1 on error.
*/

int
vector_append(VECTOR v, void *p)
{
	assert(v != NULL);

	if (v->vector_alloc == v->vector_len)
	{
		int newsize;
		void **new;

		newsize = sizeof(void *) * v->vector_alloc * 2;
		if (newsize < 0)
			return -1;

		new = (void **) realloc(v->vector_data, newsize);
		if (new == NULL)
			return -1;
		v->vector_data = new;
		v->vector_alloc *= 2;
	}

	v->vector_data[v->vector_len] = p;
	v->vector_len++;

	return v->vector_len;
}

/*
**  VECTOR_NEW -- allocate a new VECTOR
**
**  Parameters:
**  	init -- initial number of elements
**
**  Return value:
**  	A new VECTOR handle, or NULL on failure.
*/

VECTOR
vector_new(unsigned int init)
{
	VECTOR new;

	if (init < MINVECTORLEN)
		init = MINVECTORLEN;

	new = (VECTOR) malloc(sizeof *new);
	if (new == NULL)
		return new;

	new->vector_alloc = init;
	new->vector_len = 0;
	new->vector_data = (void **) malloc(sizeof(void *) * new->vector_alloc);
	if (new->vector_data == NULL)
	{
		int save_errno = errno;

		free(new);
		errno = save_errno;
		return NULL;
	}

	return new;
}

/*
**  VECTOR_CLEAR -- reset the array of data items in a VECTOR
**
**  Parameters:
**  	v -- vector to be cleared
**
**  Return value:
**  	None.
*/

void
vector_clear(VECTOR v)
{
	assert(v != NULL);

	v->vector_len = 0;
}

/*
**  VECTOR_DATA -- report the array of data items in a VECTOR
**
**  Parameters:
**  	v -- vector from which to retrieve the data array
**
**  Return value:
**  	A pointer to a NULL-terminated array of pointers stored in the
**  	vector; NULL on error.
*/

void *
vector_data(VECTOR v)
{
	assert(v != NULL);

	/* NULL-terminate the vector first */
	if (v->vector_len == 0 ||
	    v->vector_data[v->vector_len] != NULL)
	{
		if (vector_append(v, NULL) == -1)
			return NULL;
	}

	return v->vector_data;
}

/*
**  VECTOR_FREE -- release resources in use by a VECTOR
**
**  Parameters:
**  	v -- vector to destroy
**
**  Return value:
**  	None.
*/

void
vector_free(VECTOR v)
{
	assert(v != NULL);

	free(v->vector_data);
	free(v);
}

/*
**  STRING_DATA -- report data in a STRING
**
**  Parameters:
**   	str -- STRING object from which to pull data
**
**  Return value:
**  	Pointer to the string stored in "str", NULL-terminated; NULL on error.
*/

unsigned char *
string_data(STRING str)
{
	assert(str != NULL);

	if (string_cat(str, "\0") == -1)
		return NULL;

	return str->str_data;
}

/*
**  STRING_BLANK -- recycle data in a STRING
**
**  Parameters:
**   	str -- STRING object to reset
**
**  Return value:
**  	None.
*/

void
string_blank(STRING str)
{
	assert(str != NULL);

	str->str_data[0] = '\0';
	str->str_len = 0;

}

/*
**  STRING_CAT -- append data to a STRING
**
**  Parameters:
**   	str -- STRING object to update
**  	app -- pointer to data to append
**
**  Return value:
**  	Length of data now in str, or -1 if insufficient storage was available.
*/

int
string_cat(STRING str, unsigned char *app)
{
	int len;

	assert(str != NULL);
	assert(app != NULL);

	len = strlen(app) + 1;
	if (len <= 0)
		return -1;

	if (str->str_len + len >= str->str_alloc)
	{
		int alen;
		unsigned char *new;

		alen = MAX(str->str_alloc * 2, str->str_len + len);
		if (alen < 0)
			return -1;

		new = (unsigned char *) realloc(str->str_data, alen);
		if (new == NULL)
			return -1;

		str->str_alloc = alen;
		str->str_data = new;
	}

	str->str_len = sm_strlcat(str->str_data, app, str->str_alloc);

	return str->str_len;
}

/*
**  STRING_CAT1 -- append one byte to a STRING
**
**  Parameters:
**   	str -- STRING object to update
**  	c -- character to append
**
**  Return value:
**  	Length of data now in str, or -1 if insufficient storage was available.
*/

int
string_cat1(STRING str, unsigned int c)
{
	unsigned char buf[2];

	assert(str != NULL);
	assert(c < 256);

	buf[0] = c;
	buf[1] = '\0';

	return string_cat(str, buf);
}

/*
**  STRING_LENGTH -- return current size of a STRING
**
**  Parameters:
**  	str -- STRING object of interest
**
**  Return value:
**  	Current lenght of the string in "str".
*/

int
string_length(STRING str)
{
	assert(str != NULL);

	return str->str_len;
}

/*
**  STRING_CHOP -- truncate a string at the desired length
**
**  Parameters:
**  	str -- STRING object to modify
**  	len -- desired new length
**
**  Return value:
**  	None.
*/

void
string_chop(STRING str, int len)
{
	assert(str != NULL);

	len = MIN(str->str_len, len);
	str->str_data[len] = '\0';
	str->str_len = len;
}

/*
**  STRING_FREE -- release data in a STRING
**
**  Parameters:
**   	str -- STRING object to destroy
**
**  Return value:
**  	None.
*/

void
string_free(STRING str)
{
	assert(str != NULL);

	free(str->str_data);
	free(str);
}

/*
**  STRING_NEW -- allocate a STRING
**
**  Parameters:
**   	init -- initial size
**
**  Return value:
**  	A STRING handle appropriately initialized, or NULL on failure.
*/

STRING
string_new(int init)
{
	STRING new;

	assert(init >= 0);

	if (init <= MINSTRINGLEN)
		init = MINSTRINGLEN;

	new = (STRING) malloc(sizeof *new);
	if (new == NULL)
		return NULL;

	new->str_data = (unsigned char *) malloc(init);
	if (new->str_data == NULL)
	{
		int save_errno = errno;

		free(new);
		errno = save_errno;
		return NULL;
	}

	new->str_alloc = init;
	new->str_len = 0;
	new->str_data[0] = '\0';

	return new;
}

/*
**  NF_SETMAXFD -- increase the file descriptor limit as much as possible
**
**  Parameters:
**  	None.
**
**  Return value:
**  	None.
*/

void
nf_setmaxfd()
{
	struct rlimit rlp;

	if (getrlimit(RLIMIT_NOFILE, &rlp) != 0)
	{
		syslog(LOG_WARNING, "getrlimit(): %s", strerror(errno));
	}
	else
	{
		rlp.rlim_cur = rlp.rlim_max;
		if (setrlimit(RLIMIT_NOFILE, &rlp) != 0)
		{
			syslog(LOG_WARNING, "setrlimit(): %s",
			       strerror(errno));
		}
	}
}

/*
**  NF_BLANKLINE -- report whether or not a line contains only whitespace
**                  or is zero-length
**
**  Parameters:
**  	str -- string to evaluate
**
**  Return value:
**  	TRUE iff the string provided is zero-length or contains only
**  	whitespace.
*/

bool
nf_blankline(unsigned char *str)
{
	unsigned char *p;

	assert(str != NULL);

	for (p = str; *p != '\0'; p++)
	{
		if (!isascii(*p) || !isspace(*p))
			return FALSE;
	}

	return TRUE;
}

/*
**  NF_ISHSPACE -- return TRUE iff the provided character is a horizontal space
**
**  Parameters:
**  	c -- character to evaluate
**
**  Return value:
**  	TRUE iff "c" is a horizontal space of some kind.
*/

bool
nf_ishspace(unsigned int c)
{
	return (c == ' ' || c == '\t');
}

/*
**  NF_ISHEADER -- do like the MTA does to decide if an input line is a header
**
**  Parameters:
**  	str -- candidate string
**
**  Return value:
**  	TRUE iff "str" looks and smells like a header.
*/

bool
nf_isheader(unsigned char *str)
{
	unsigned char *s;

	assert(str != NULL);

	s = str;

	if (s[0] == '-' && s[1] == '-')
		return FALSE;
 
	while (*s > ' ' && *s != ':' && *s != '\0')
		s++;
  
	if (str == s)
		return FALSE;

	/* following technically violates RFC822 */
	while (isascii(*s) && isspace(*s))
		s++;

	return (*s == ':');
}
