The OpenNET Project
 
Search (keywords):  SOFT ARTICLES TIPS & TRICKS SECURITY
LINKS NEWS MAN DOCUMENTATION


"chat"


<< Previous INDEX Search src Set bookmark Go to bookmark Next >>
Date: Thu, 22 Oct 1998 16:25:46 -0400 (EDT)
From: Joseph S D Yao <[email protected]>
To: (Chuck W.) <[email protected]>
Subject: Re: "chat"
Cc: [email protected]

> Doesn't a finite state automaton reading one character at a time slow
> things down a bit too much? I thought about the same thing using getchar()
> or some derivative therof, but I was afraid it would gum up the works a
> bit too much. I must admit I have never taken any metrics on the two. Do
> you have any code to pony up for a test?

Not in the least.  That's what you have buffered I/O for.  In fact, it's
often faster.  Consider that fgets() looks something like:

	while (cp < ep) {
		c = getchar();
		if (c == EOF)
			break;
		*cp++ = c;
		if (c == NL)
			break;
	}
	*cp = NUL;
	return(buf);

and, if you want to be safe, you then have to test for the existence of
a NL, and take appropriate actions (e.g., flush to EOL) if not.

Try this.  I needed to write something like this anyway.

============================= cut here ================================
/*********************************************************************\
**
** logsplit - split 'way-too-big log files with syslog-like dates.
**
** Syntax:
**	logsplit < logfile
**
** Description:
**	Splits the file into files named year00, year01, ..., breaking
**	every time it sees a new "Jan " not preceded by another "Jan ".
**
**	Uses FSA - states are location in "start" X value of "state"
**		X value of "match".  States are collapsed.
**
** $Log:$
**
** Files:
**	year??	- output files.
**
** Routines:
**	int main(int argc, char **argv, char **envp)
**
** Data:
**	typedef char bool;
**	typedef int boolean;
**	static char start[]	= "Jan ";
**	static char outfile[]	= "year\0\0";
**	char *myname	= "logsplit";
**
\*********************************************************************/

#ifndef	lint
  static char RCS_id[]	= "@(#)$Id:$";
#endif/*lint*/

/* Only Standard C Library calls used. */
#include <stdio.h>
#include <string.h>

/* No constants in code!!!  ;-) */

/* Special characters in the code. */
#define NUL	'\0'
#define NL	'\n'
#define DIRC	'/'
#define FLAGC	'-'

/* State values. */
#define OUTJAN	0	/* I've seen a non-Jan line. */
#define INJAN	1	/* I've seen a Jan line. */
#define START	2	/* I've not seen any lines. */

/* Match state values. */
#define MATCHING	0	/* Still trying to match. */
#define	NOT_MATCHING	1	/* Not currently trying. */
/* Temporary state to help collapsing states */
#define MATCH_DONE	2	/* Match has just completed. */

/* Arguments to fopen(). */
#define READ	"r"
#define WRITE	"w"
#define APPEND	"a"

/* Number of digits in the year, and number of values that holds. */
#define YRDIGS	2
#define YRMAX	100

/* Boolean values. */
#define TRUE	(1)
#define FALSE	(0)

/*
** Boolean data types - bool for small values, boolean for args and
** return values.
*/
typedef char bool;
typedef int boolean;

/* The string to match at the beginning of the line. */
static char start[]	= "Jan ";

/* The name of the output file, with YRDIGS placeholders for year. */
static char outfile[]	= "year\0\0";

/* The name of this program. */
char *myname	= "logsplit";

/*
** main routine - reads argv[0] for 'myname', otherwise uses no args.
** always returns 0.
*/
int main(int argc, char **argv, char **envp)
{
	register char *cp, *ep;		/* pointers into start[] */
	register int state, match;	/* other state variables */
	register int c;			/* the character read. */
	char *yearptr;			/* where to save year #? */
	unsigned int year;		/* year number */
	FILE *outf;			/* output file handle */

#ifdef	lint
	/* Lint complains that envp's not used.  OK. */
	argv = envp;
	/* This code should never be seen by a compiler. */
#endif/*lint*/

	/* Get the program's name */
	if (argc > 0) {
		/* There is an argv[0].  Get the name from there. */
		myname = strrchr(*argv, DIRC);
		/*
		** If there is no DIRC, use the whole name; otherwise,
		** use what's after DIRC.
		*/
		if (myname == (char *) NULL)
			myname = *argv;
		  else
		  	++myname;
	}

	/* Initialize program variables. */
	year = 0;			/* Start with "year00". */
	yearptr = strchr(outfile, NUL);	/* Find First NUL. */
	outf = (FILE *) NULL;		/* No file yet. */

	/* cp points to start of start; ep points to NUL at end. */
	cp = start;
	for (ep = cp; *ep != NUL; ++ep);

	/* Initial states. */
	state = START;
	match = MATCHING;

	/*
	** Loop on reading one character at a time.  Do something.
	** Change state (values of state & match, location in start[])
	** depending on value.
	*/
	while ((c = getchar()) != EOF) {
		/*
		** If we're currently still matching, then try to match.
		*/
		if (match == MATCHING) {
			if (c != *cp++) {
				/* Not a match - change state only. */
				state = OUTJAN;
				match = MATCH_DONE;
			} else if (cp == ep) {
				/* Match! */

				/*
				** If we had previously seen a non-Jan
				** line, then if we had been writing to
				** a file [and we should have been],
				** close that file, and bump the year
				** up.
				*/
				if (state == OUTJAN) {
					if (outf != (FILE *) NULL) {
						(void) fclose(outf);
						outf = (FILE *) NULL;
						++year;
					}
				}

				/* Change the state. */
				state = INJAN;
				match = MATCH_DONE;
			}
		}

		/*
		** If we're still trying to match, don't write anything
		** out yet - it might be to the wrong file!
		*/
		if (match == MATCHING)
			continue;

		/*
		** If no file is currently open, create a name using the
		** current value of "year" [00-99], and create a file of
		** that name.  If any of that fails, complain, and break
		** out of this rut.
		*/
		if (outf == (FILE *) NULL) {
			/* Will the year fit? */
			if (year >= YRMAX) {
				fprintf(stderr,
					"%s: year is %d, too many years.\n",
					myname, year);
				break;
			}

			/* Create the file name. */
			(void) sprintf(yearptr, "%*.*u",
				YRDIGS, YRDIGS, year);

			/* Create the file. */
			outf = fopen(outfile, WRITE);

			/* Did it get created? */
			if (outf == (FILE *) NULL) {
				perror(myname);
				fprintf(stderr, "%s: Can't open \"%s\".",
					myname, outfile);
				break;
			}
		}

		/*
		** If we have only just now finished matching [or not,
		** as the case may be], write out the portion that
		** matched prior to the current character.  Then drop
		** the temporary state.
		*/
		if (match == MATCH_DONE) {
			if (--cp > start)
				(void) fwrite(start, 1, cp - start,
								outf);
			match = NOT_MATCHING;
		}

		/* Print the current character. */
		putc(c, outf);

		/*
		** If the current character is a NL, change the state to
		** start matching again with the next character.
		*/
		if (c == NL) {
			cp = start;
			match = MATCHING;
		}
	}

	/* Clean up.  Just because it's always a Good Idea(tm). */
	if (outf != (FILE *) NULL)
		(void) fclose(outf);

	/* Always return SUCCESS. */
	return(0);
}
============================= cut here ================================

--
/*********************************************************************\
**
** Joe Yao				[email protected] - Joseph S. D. Yao
**
\*********************************************************************/

<< Previous INDEX Search src Set bookmark Go to bookmark Next >>



Партнёры:
PostgresPro
Inferno Solutions
Hosting by Hoster.ru
Хостинг:

Закладки на сайте
Проследить за страницей
Created 1996-2025 by Maxim Chirkov
Добавить, Поддержать, Вебмастеру