kragen · December 12, 2008 22:36
diff --git a/kwic-shifts.c b/kwic-shifts.c
 /* produce the cyclic shifts of each input line -*- coding: utf-8 -*- */
 /* A sort of rebuttal to “Yannis’s Law”.  Yannis quotes from [Parnas
 * 1972] (http://sunnyday.mit.edu/16.355/parnas-criteria.html):
 * 
 * > The KWIC index system accepts an ordered set of lines, each line
 * > is an ordered set of words, and each word is an ordered set of
 * > characters. Any line may be "circularly shifted" by repeatedly
 * > removing the first word and appending it at the end of the
 * > line. The KWIC index system outputs a listing of all circular
 * > shifts of all lines in alphabetical order. This is a small
 * > system. Except under extreme circumstances (huge data base, no
 * > supporting software), such a system could be produced by a good
 * > programmer within a week or two.
 *
 * Then he comments:
 *
 * > The year is 2003 and I would not consider a programmer to be
 * > good (this includes familiarity with tools) if they cannot
 * > produce the KWIC system within *an hour or two*, instead of a
 * > week or two in 1972. ... This impressive progress is arguably the
 * > cumulative result of reusable software entities, better system
 * > tools, better programming languages, better CS education, but
 * > also good use of faster machines that allow us to ignore
 * > low-level overheads and favor slightly less efficient but
 * > convenient solutions.
 * 
 * (Actually Parnas wrote his paper in 1971, not 1972.)
 *
 * Well, this program solves the KWIC indexing problem as stated (for
 * a particularly simpleminded definition of “word”) when piped to a
 * system sort routine, and it took me from 19:20:49 to 19:35:18 one
 * evening to write and debug.  (I added the line numbers later,
 * though.)  I compiled it a total of 8 times, including adding the
 * line numbers.  I had two bugs: I wasn’t checking to make sure I
 * wasn’t running past the end of the input buffer when advancing to
 * the next word, and I wasn’t removing the newline from the input
 * line.  I didn’t use a modern IDE; I used Emacs and ran “make
 * kwic-shifts CFLAGS=-Wall” in another window.
 *
 * It depends on the following system facilities:
 * - convenient decimal formatting via `printf`
 * - `fgets`, `fputs`, `putchar` (i.e. I don’t have to do my own I/O
 *   buffering)
 * - `isspace()`
 * - having a system sort program
 * - a compiler for a terse low-level language such as C
 *
 * I admit that C, Emacs, and GCC are all somewhat nicer tools than I
 * would have had in 1971, and additionally this isn’t the first time
 * I’ve solved this problem (although it's the first time I’ve solved
 * it in C).  But, if I had access to a timesharing system with a
 * more-or-less working compiler for a language of a similar level to
 * C (say, BLISS-11, or Forth, or BCPL, or Algol-60, or PL/1, or a
 * reasonable FORTRAN) with the very minimal system facilities I have
 * outlined above, I could do it in about the same time.  Maybe it
 * would have taken me half an hour or an hour instead of 14 minutes
 * and 29 seconds, and I would have spent more time desk-checking and
 * less time testing, especially if the system didn’t have memory
 * protection.  And the result is reasonably efficient; Parnas didn’t
 * consider as essential, in his paper, tricks like interning the
 * words to get by on a two-byte array index per word, or even using a
 * temporary file to store the circular shifts, which allows you to
 * handle input files of arbitrary size.
 *
 * So I think Yannis’s law is just wrong.  It’s true that you ought to
 * be able to solve that problem in less than a week and a half; maybe
 * five minutes in Python.  But you also should have been able to
 * solve it in less than an hour in 1971, if you’re a good programmer.
 * I mean, shit, I'm not a good programmer, and it took me less than
 * 15 minutes.
 * 
 * So why does Parnas think this system, as he describes it, should
 * take a “good programmer” a week or two to build?  Maybe the
 * programmers he had to work with weren’t very good, or maybe they
 * were using a batch-processing system.
 */ 

 #include <stdio.h>
 #include <ctype.h>
 char buf[BUFSIZ];

 void output_cyclic_shift(char *buf, char *pos, int lineno) {
  char *s;
  fputs(pos, stdout);
  if (pos > buf) putchar(' ');
  for (s = buf; s < pos; s++) putchar(*s);
  putchar(' ');
  printf("%d\n", lineno);
 }

 int main(int argc, char **argv) {
  char *s;
  int lineno = 0;
  while (fgets(buf, BUFSIZ, stdin)) {
    lineno++;
    for (s = buf; *s; s++);
    if (*--s == '\n') *s = '\0';
    for (s = buf; *s && (s - buf < BUFSIZ); ) {
      for (; *s && (s-buf < BUFSIZ); s++) if (!isspace(*s)) break;
      output_cyclic_shift(buf, s, lineno);
      for (; *s && (s-buf < BUFSIZ); s++) if (isspace(*s)) break;
    }
  }
  return 0;
 }
	/* produce the cyclic shifts of each input line -- coding: utf-8 -- */
	/* A sort of rebuttal to “Yannis’s Law”. Yannis quotes from [Parnas
	* 1972] (http://sunnyday.mit.edu/16.355/parnas-criteria.html):
	*
	* > The KWIC index system accepts an ordered set of lines, each line
	* > is an ordered set of words, and each word is an ordered set of
	* > characters. Any line may be "circularly shifted" by repeatedly
	* > removing the first word and appending it at the end of the
	* > line. The KWIC index system outputs a listing of all circular
	* > shifts of all lines in alphabetical order. This is a small
	* > system. Except under extreme circumstances (huge data base, no
	* > supporting software), such a system could be produced by a good
	* > programmer within a week or two.
	*
	* Then he comments:
	*
	* > The year is 2003 and I would not consider a programmer to be
	* > good (this includes familiarity with tools) if they cannot
	* > produce the KWIC system within an hour or two, instead of a
	* > week or two in 1972. ... This impressive progress is arguably the
	* > cumulative result of reusable software entities, better system
	* > tools, better programming languages, better CS education, but
	* > also good use of faster machines that allow us to ignore
	* > low-level overheads and favor slightly less efficient but
	* > convenient solutions.
	*
	* (Actually Parnas wrote his paper in 1971, not 1972.)
	*
	* Well, this program solves the KWIC indexing problem as stated (for
	* a particularly simpleminded definition of “word”) when piped to a
	* system sort routine, and it took me from 19:20:49 to 19:35:18 one
	* evening to write and debug. (I added the line numbers later,
	* though.) I compiled it a total of 8 times, including adding the
	* line numbers. I had two bugs: I wasn’t checking to make sure I
	* wasn’t running past the end of the input buffer when advancing to
	* the next word, and I wasn’t removing the newline from the input
	* line. I didn’t use a modern IDE; I used Emacs and ran “make
	* kwic-shifts CFLAGS=-Wall” in another window.
	*
	* It depends on the following system facilities:
	* - convenient decimal formatting via `printf`
	* - `fgets`, `fputs`, `putchar` (i.e. I don’t have to do my own I/O
	* buffering)
	* - `isspace()`
	* - having a system sort program
	* - a compiler for a terse low-level language such as C
	*
	* I admit that C, Emacs, and GCC are all somewhat nicer tools than I
	* would have had in 1971, and additionally this isn’t the first time
	* I’ve solved this problem (although it's the first time I’ve solved
	* it in C). But, if I had access to a timesharing system with a
	* more-or-less working compiler for a language of a similar level to
	* C (say, BLISS-11, or Forth, or BCPL, or Algol-60, or PL/1, or a
	* reasonable FORTRAN) with the very minimal system facilities I have
	* outlined above, I could do it in about the same time. Maybe it
	* would have taken me half an hour or an hour instead of 14 minutes
	* and 29 seconds, and I would have spent more time desk-checking and
	* less time testing, especially if the system didn’t have memory
	* protection. And the result is reasonably efficient; Parnas didn’t
	* consider as essential, in his paper, tricks like interning the
	* words to get by on a two-byte array index per word, or even using a
	* temporary file to store the circular shifts, which allows you to
	* handle input files of arbitrary size.
	*
	* So I think Yannis’s law is just wrong. It’s true that you ought to
	* be able to solve that problem in less than a week and a half; maybe
	* five minutes in Python. But you also should have been able to
	* solve it in less than an hour in 1971, if you’re a good programmer.
	* I mean, shit, I'm not a good programmer, and it took me less than
	* 15 minutes.
	*
	* So why does Parnas think this system, as he describes it, should
	* take a “good programmer” a week or two to build? Maybe the
	* programmers he had to work with weren’t very good, or maybe they
	* were using a batch-processing system.
	*/

	#include <stdio.h>
	#include <ctype.h>
	char buf[BUFSIZ];

	void output_cyclic_shift(char buf, char pos, int lineno) {
	char *s;
	fputs(pos, stdout);
	if (pos > buf) putchar(' ');
	for (s = buf; s < pos; s++) putchar(*s);
	putchar(' ');
	printf("%d\n", lineno);
	}

	int main(int argc, char **argv) {
	char *s;
	int lineno = 0;
	while (fgets(buf, BUFSIZ, stdin)) {
	lineno++;
	for (s = buf; *s; s++);
	if (--s == '\n') s = '\0';
	for (s = buf; *s && (s - buf < BUFSIZ); ) {
	for (; s && (s-buf < BUFSIZ); s++) if (!isspace(s)) break;
	output_cyclic_shift(buf, s, lineno);
	for (; s && (s-buf < BUFSIZ); s++) if (isspace(s)) break;
	}
	}
	return 0;
	}
No results found