gulan · November 17, 2016 17:28
diff --git a/longest.c b/longest.c
 /*
 Display the length of the the longest line from text input.
 */

 /*
 [Skip the write-up as I did not follow it when writing the code.]

 A simple text file is an iteration of lines, followed by EOF.

    simple = body + EOF
    body = line*
    line = string + newline
    string = regular-char*

 I assume the file is ascii. [I need to stop making this assumption.] A
 regular char is any ascii char except newline and NUL. NUL is excluded
 because C uses it to indicate the end of a string.

 A problem with this definition is that a line may be arbitrarily
 long. But in C, we need to allocate the storage for the text of the
 line *before* reading it. There are a number of ways to overcome or at
 least mitigate this problem, but our first step is refine the
 description of the input file.

    not-so-simple = body + EOF
    body = line*
    line = normal-line | long-line
    normal-line = string + newline
    long-line = string + newline
    string = regular-char*

 Now we can see that our fundemetal difficultly is that we have a
 recogition problem; When we start to read a line, we do not know if it
 is a normal-line or a long-line.

 There are a number of ways to handle recognition problems.

 (1) pre-process the input
 (2) read-ahead
 (3) backtrack

 Pre-Process
 -----------
 We could scan the file and produce two output records for each line:

    recno | code | variable-part

 The recno is just a sequence number that identifies each line in the
 file. The code is either 'A' or 'B'. The A means that the variable
 part is a an integer giving the length of the line. Tke 'B' means that
 the variable part is the exact text of the original line.

 The scan would produce the record pairs in B then A order. That file
 would have to be sorted onto A then B order. The records are layed out
 to enable that sort.

 This solution works fine at the cost of an extra file pass and a sort
 step. In fact, this scheme was quite common in the bygone days of
 large sequential files and very small main memory.

 Read Ahead
 ----------
 If we buffer our read by 64K, our program can be written as if we
 already know which records are normal and which records are long.

 Backtracking
 ------------
 When we come to a new record, we do not know if it is normal or long,
 so we guess. We save the current state of the program, including our
 position in the input and output files, and guess that we have a normal
 line.

 If we were right, we can 'commit' the guess by writing any pending
 output and discarding the saved state.

 If we were wrong, we restore the saved state and go with the 'long'
 procedure.

 For this problem, the long procedure would just have rereading the
 same text. That leads naturally to an optimization of not discarding
 that text when backtracking. The result is that the backtracking
 solution looks very much like the read-ahead solution.

 [Maybe the same solution under the covers. I still see backtracking as
 a structural solution, but readahead as predicates on the state.]

 */

 #include<assert.h>
 #include<error.h>
 #include<stdio.h>
 #include<stdlib.h>
 #define MAXLINE 1000

 /* getline() fills the line buffer by reference. At the end, getline
 must indicate how many characters were added to the buffer and whether
 or not EOF was seen. Assuming that every line ends with a newline, EOL
 and EOF are mutally exclusive.

 If we include the newline in the buffer then even an empty line will
 have a positive length. We are free to use a length 0 to indicate
 EOF. I don't like this overloaded meaning, but it is a common C
 idiom.*/
 int
 getaline(char s[]) {
  int ch;
  int ix=0;
  
  ch = getchar();
  while (ch != EOF && ch != '\n' && ix < MAXLINE) {
    s[ix++] = ch;
    ch = getchar();
  }
  assert (ch == EOF || ch == '\n' || ix == MAXLINE);
  if (ix == MAXLINE) {
    error(99, 0, "maxline length exceeded");
  }
  if (ch == '\n')
    s[ix++] = '\n';
  /* A final NUL signals the end of the string. This NUL is not
  represented in the length.*/
  s[ix] = '\0';
  return ix;
 }

 int
 main(int argc, char *argv[]) {
  char line[MAXLINE];
  int len, maxl=0;
  
  len = getaline(line);
  while (len > 0) {
    if (len > maxl)
      maxl = len;
    len = getaline(line);
  }
  printf("%d\n", maxl);
  return 0;
 }
	/*
	Display the length of the the longest line from text input.
	*/

	/*
	[Skip the write-up as I did not follow it when writing the code.]

	A simple text file is an iteration of lines, followed by EOF.

	simple = body + EOF
	body = line*
	line = string + newline
	string = regular-char*

	I assume the file is ascii. [I need to stop making this assumption.] A
	regular char is any ascii char except newline and NUL. NUL is excluded
	because C uses it to indicate the end of a string.

	A problem with this definition is that a line may be arbitrarily
	long. But in C, we need to allocate the storage for the text of the
	line before reading it. There are a number of ways to overcome or at
	least mitigate this problem, but our first step is refine the
	description of the input file.

	not-so-simple = body + EOF
	body = line*
	line = normal-line \| long-line
	normal-line = string + newline
	long-line = string + newline
	string = regular-char*

	Now we can see that our fundemetal difficultly is that we have a
	recogition problem; When we start to read a line, we do not know if it
	is a normal-line or a long-line.

	There are a number of ways to handle recognition problems.

	(1) pre-process the input
	(2) read-ahead
	(3) backtrack

	Pre-Process
	-----------
	We could scan the file and produce two output records for each line:

	recno \| code \| variable-part

	The recno is just a sequence number that identifies each line in the
	file. The code is either 'A' or 'B'. The A means that the variable
	part is a an integer giving the length of the line. Tke 'B' means that
	the variable part is the exact text of the original line.

	The scan would produce the record pairs in B then A order. That file
	would have to be sorted onto A then B order. The records are layed out
	to enable that sort.

	This solution works fine at the cost of an extra file pass and a sort
	step. In fact, this scheme was quite common in the bygone days of
	large sequential files and very small main memory.

	Read Ahead
	----------
	If we buffer our read by 64K, our program can be written as if we
	already know which records are normal and which records are long.

	Backtracking
	------------
	When we come to a new record, we do not know if it is normal or long,
	so we guess. We save the current state of the program, including our
	position in the input and output files, and guess that we have a normal
	line.

	If we were right, we can 'commit' the guess by writing any pending
	output and discarding the saved state.

	If we were wrong, we restore the saved state and go with the 'long'
	procedure.

	For this problem, the long procedure would just have rereading the
	same text. That leads naturally to an optimization of not discarding
	that text when backtracking. The result is that the backtracking
	solution looks very much like the read-ahead solution.

	[Maybe the same solution under the covers. I still see backtracking as
	a structural solution, but readahead as predicates on the state.]

	*/

	#include<assert.h>
	#include<error.h>
	#include<stdio.h>
	#include<stdlib.h>
	#define MAXLINE 1000

	/* getline() fills the line buffer by reference. At the end, getline
	must indicate how many characters were added to the buffer and whether
	or not EOF was seen. Assuming that every line ends with a newline, EOL
	and EOF are mutally exclusive.

	If we include the newline in the buffer then even an empty line will
	have a positive length. We are free to use a length 0 to indicate
	EOF. I don't like this overloaded meaning, but it is a common C
	idiom.*/
	int
	getaline(char s[]) {
	int ch;
	int ix=0;

	ch = getchar();
	while (ch != EOF && ch != '\n' && ix < MAXLINE) {
	s[ix++] = ch;
	ch = getchar();
	}
	assert (ch == EOF \|\| ch == '\n' \|\| ix == MAXLINE);
	if (ix == MAXLINE) {
	error(99, 0, "maxline length exceeded");
	}
	if (ch == '\n')
	s[ix++] = '\n';
	/* A final NUL signals the end of the string. This NUL is not
	represented in the length.*/
	s[ix] = '\0';
	return ix;
	}

	int
	main(int argc, char *argv[]) {
	char line[MAXLINE];
	int len, maxl=0;

	len = getaline(line);
	while (len > 0) {
	if (len > maxl)
	maxl = len;
	len = getaline(line);
	}
	printf("%d\n", maxl);
	return 0;
	}
No results found