Created
April 12, 2012 23:19
-
-
Save RavuAlHemio/2371810 to your computer and use it in GitHub Desktop.
mysed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file mysed.c | |
* | |
* @brief An oversimplified implementation of sed. | |
* | |
* @author Ondřej Hošek <[email protected]> | |
*/ | |
#include <assert.h> | |
#include <errno.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <unistd.h> | |
/** Size of line buffer, in characters. */ | |
#define LINE_BUF_SIZE 1024 | |
/** | |
* Name of the program. | |
* | |
* Since this is only written once and independent of threads et al., | |
* it has been placed into module scope. | |
*/ | |
static const char *progname = "(null)"; | |
/** | |
* A structure storing the substitutions to be made. | |
*/ | |
typedef struct sub_s | |
{ | |
/** The string which should be replaced by the new string. */ | |
char *oldstr; | |
/** The string which should replace the old string. */ | |
char *newstr; | |
} sub_t; | |
/** | |
* Prints a message on how to execute this program and exits with a | |
* failure code. | |
* | |
* @warning This function does not return. | |
*/ | |
static void usage(void) | |
{ | |
(void)fprintf(stderr, "Usage: %s [-e script]... [file]...\n", progname); | |
exit(EXIT_FAILURE); | |
} | |
/** | |
* Frees the substitutions in the given array, and then the array itself. | |
* | |
* @param subs The array to free. | |
* @param nsubs Number of elements stored in the subs array. | |
*/ | |
static void free_subs(sub_t *subs, size_t nsubs) | |
{ | |
size_t i; | |
if (subs == NULL) | |
{ | |
return; | |
} | |
for (i = 0; i < nsubs; ++i) | |
{ | |
free(subs[i].oldstr); | |
free(subs[i].newstr); | |
} | |
free(subs); | |
} | |
/** | |
* Returns whether the given script is valid. | |
* | |
* @param scrstr Script string to validate. | |
* @return Whether the given script is valid. | |
*/ | |
static bool script_valid(const char *scrstr) | |
{ | |
/* format: s/oldstr/newstr/g */ | |
size_t i; | |
size_t scrlen = strlen(scrstr); | |
size_t slashcount = 0; | |
/* check if it is at least 6 chars long ("s/x//g") */ | |
if (scrlen < 6) | |
{ | |
return false; | |
} | |
/* check if begins with s/ and oldstr is at least one character long */ | |
if (scrstr[0] != 's' || scrstr[1] != '/' || scrstr[2] == '/') | |
{ | |
return false; | |
} | |
/* check if ends with /g */ | |
if (scrstr[scrlen-2] != '/' || scrstr[scrlen-1] != 'g') | |
{ | |
return false; | |
} | |
/* check if contains exactly three slashes */ | |
for (i = 0; i < scrlen; ++i) | |
{ | |
if (scrstr[i] == '/') | |
{ | |
++slashcount; | |
} | |
} | |
return (slashcount == 3); | |
} | |
/** | |
* Prints a standard-format error message to standard error. | |
* | |
* @param funcname Name of the function that failed. | |
*/ | |
static void print_failure(const char *funcname) | |
{ | |
(void)fprintf(stderr, "%s: %s: %s\n", progname, funcname, strerror(errno)); | |
} | |
/** | |
* Counts the number of occurrences of a string in another. | |
* | |
* @param haystack The string in which to count the occurrences. | |
* @param needle The string whose occurrences to count. | |
* @return Number of occurrences of needle in haystack. | |
*/ | |
static size_t count_occs(const char *haystack, const char *needle) | |
{ | |
size_t ret = 0; | |
char *occ; | |
while ((occ = strstr(haystack, needle)) != NULL) | |
{ | |
++ret; | |
haystack = occ + strlen(needle); | |
} | |
return ret; | |
} | |
/** | |
* Perform the given substitutions in order on the given file. | |
* | |
* @warning If a line is longer than LINE_BUF_SIZE-2 characters, the | |
* behavior is undefined. | |
* | |
* @param f File on which to perform substitutions. | |
* @param subs The substitutions to perform, in order. | |
* @param nsubs The number of substitutions to perform. | |
*/ | |
static void perform_subs(FILE *f, sub_t *subs, size_t nsubs) | |
{ | |
char *oldbuf, *newbuf; | |
char *oldwalker, *newwalker; | |
char *occ; | |
size_t i, subcount, newbuflen, copylen; | |
/* initial buffer allocation */ | |
oldbuf = malloc(LINE_BUF_SIZE); | |
if (oldbuf == NULL) | |
{ | |
print_failure("malloc"); | |
free_subs(subs, nsubs); | |
exit(EXIT_FAILURE); | |
} | |
/* read lines */ | |
while (fgets(oldbuf, LINE_BUF_SIZE, f) != NULL) | |
{ | |
/* perform substitutions */ | |
for (i = 0; i < nsubs; ++i) | |
{ | |
/* calculate size of new buffer */ | |
subcount = count_occs(oldbuf, subs[i].oldstr); | |
newbuflen = strlen(oldbuf) + subcount*strlen(subs[i].newstr) - subcount*strlen(subs[i].oldstr); | |
/* allocate new buffer */ | |
newbuf = malloc(newbuflen+1); | |
if (newbuf == NULL) | |
{ | |
print_failure("malloc"); | |
free_subs(subs, nsubs); | |
exit(EXIT_FAILURE); | |
} | |
/* prepare walkers */ | |
oldwalker = &oldbuf[0]; | |
newwalker = &newbuf[0]; | |
/* while there still is an occurrence */ | |
while ((occ = strstr(oldwalker, subs[i].oldstr)) != NULL) | |
{ | |
/* calculate number of chars to copy */ | |
copylen = occ - oldwalker; | |
/* copy text up and until this occurrence */ | |
(void)strncpy(newwalker, oldwalker, copylen); | |
/* advance walkers */ | |
oldwalker += copylen; | |
newwalker += copylen; | |
/* copy new string instead of the old one */ | |
(void)strncpy(newwalker, subs[i].newstr, strlen(subs[i].newstr)); | |
/* advance walkers (differently!) */ | |
newwalker += strlen(subs[i].newstr); | |
oldwalker += strlen(subs[i].oldstr); | |
} | |
/* copy the rest */ | |
(void)strncpy(newwalker, oldwalker, strlen(oldwalker)); | |
newwalker += strlen(oldwalker); | |
/* NUL-terminate -.- */ | |
newwalker[0] = '\0'; | |
/* swizzle buffers */ | |
free(oldbuf); | |
oldbuf = newbuf; | |
} | |
/* output the completely replaced line */ | |
(void)fputs(oldbuf, stdout); | |
/* renew the buffer */ | |
free(oldbuf); | |
oldbuf = malloc(LINE_BUF_SIZE); | |
if (oldbuf == NULL) | |
{ | |
print_failure("malloc"); | |
free_subs(subs, nsubs); | |
exit(EXIT_FAILURE); | |
} | |
} | |
if (ferror(f)) | |
{ | |
(void)fprintf(stderr, "%s: I/O error\n", progname); | |
free(oldbuf); | |
free_subs(subs, nsubs); | |
exit(EXIT_FAILURE); | |
} | |
/* the final step */ | |
free(oldbuf); | |
} | |
/** | |
* The main entry point of the program. | |
* | |
* @param argc Number of command-line arguments. | |
* @param argv The command-line arguments themselves. | |
* @return Exit code of the program -- zero if successful, | |
* nonzero otherwise. | |
*/ | |
int main(int argc, char **argv) | |
{ | |
int opt; | |
sub_t *subs = NULL; | |
size_t i, subcount = 0; | |
/* set program name */ | |
if (argc > 0) | |
{ | |
progname = argv[0]; | |
} | |
/* parse args */ | |
while ((opt = getopt(argc, argv, "e:")) != -1) | |
{ | |
switch (opt) | |
{ | |
case 'e': | |
{ | |
sub_t *newsubs; | |
sub_t *thenewsub; | |
size_t oldlen, newlen; | |
/* validate */ | |
if (!script_valid(optarg)) | |
{ | |
(void)fprintf(stderr, "%s: Invalid script '%s'\n", progname, optarg); | |
free_subs(subs, subcount); | |
exit(EXIT_FAILURE); | |
} | |
/* calculate string lengths */ | |
oldlen = strcspn(optarg+2, "/"); /* skip s/ */ | |
newlen = strcspn(optarg+oldlen+3, "/"); /* skip s/<oldstr>/ */ | |
/* skip identity substitutions */ | |
if (oldlen == newlen && strncmp(optarg+2, optarg+oldlen+3, oldlen) == 0) | |
{ | |
continue; | |
} | |
/* resize array */ | |
++subcount; | |
newsubs = realloc(subs, subcount*sizeof(*newsubs)); | |
if (newsubs == NULL) | |
{ | |
print_failure("realloc"); | |
free_subs(subs, subcount); | |
exit(EXIT_FAILURE); | |
} | |
subs = newsubs; | |
/* store a "shortcut" */ | |
thenewsub = &subs[subcount-1]; | |
/* allocate space for the strings */ | |
thenewsub->oldstr = malloc(oldlen+1); | |
if (thenewsub->oldstr == NULL) | |
{ | |
print_failure("malloc"); | |
thenewsub->newstr = NULL; /* so that free_subs works */ | |
free_subs(subs, subcount); | |
exit(EXIT_FAILURE); | |
} | |
thenewsub->newstr = malloc(newlen+1); | |
if (thenewsub->newstr == NULL) | |
{ | |
print_failure("malloc"); | |
free_subs(subs, subcount); | |
exit(EXIT_FAILURE); | |
} | |
/* copy the strings */ | |
(void)strncpy(thenewsub->oldstr, optarg+2, oldlen); | |
(void)strncpy(thenewsub->newstr, optarg+oldlen+3, newlen); | |
/* NUL-terminate because strncpy was specified by monkeys on opium */ | |
thenewsub->oldstr[oldlen] = '\0'; | |
thenewsub->newstr[newlen] = '\0'; | |
break; | |
} | |
case '?': | |
free_subs(subs, subcount); | |
usage(); | |
break; | |
default: | |
assert(0 && "default case of getopt switch"); | |
} | |
} | |
/* calculate number of files */ | |
if (optind < argc) | |
{ | |
/* there are files */ | |
for (i = optind; i < argc; ++i) | |
{ | |
FILE *f = fopen(argv[i], "r"); | |
if (f == NULL) | |
{ | |
print_failure("fopen"); | |
free_subs(subs, subcount); | |
exit(EXIT_FAILURE); | |
} | |
perform_subs(f, subs, subcount); | |
(void)fclose(f); | |
} | |
} | |
else | |
{ | |
/* no files; read stdin */ | |
perform_subs(stdin, subs, subcount); | |
} | |
free_subs(subs, subcount); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment