The C Programming Language

Chapter 5 - Pointers and Arrays 17

mmresult 2017. 12. 22. 10:40

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define AUTHOR "Robert Taylor"
#define CREATION_DATE "May, 2014"
#define LAST_UPDATE __DATE__ /* last date that binary was compiled */
static char *program_name;
/* My solution to Exercise 5-17
 * of the C Programming Language (second edition)
 * by Brian W. Kernighan
 * and Denis M. Ritchie
 *
 * To compile:
 * gcc -Os -Wall -s -o sort sort.c
 *
 * For help:
 * ./sort --h
 */

#define MAXLEN 1000 /* max length of any input line */
#define MAXLINES 500000 /* max # of lines to be sorted */
#define ALLOCSIZE 15000000 /* size of space to store lines */
#define NUMDIGITS 5 /* 4 digits plus the '\0' terminator
       for input of numeric option values */
static char *lineptr[MAXLINES]; /* pointers to text lines */

static int getLine(char *s, int lim);
static int readlines(char *lineptr[], int nlines);
static void writelines(char *lineptr[], int nlines);
static void my_qsort(void *lineptr[], int left, int right, int (*comp)(const void*, const void*));
static int numcmp(const char *s1, const char *s2);
static int str_cmp(const char *s1, const char *s2);
static int compare(const char *s1, const char *s2);
static void swap(void *v[], int i, int j);
static int parse_args(int argc, char *argv[]);
static void recordinit(void); /* initialize each new field record */
static void dump_field_records(void); /* dump field records for analysis */
static void print_help(void); /* provide help on program usage */
static void substr(char *s1, const char *s2, int p1, int p2);
static void use_record(const char *s1, const char *s2, int n);
static void substr_delim(char *s1, const char *s2, int n);
static void dump_parsed_fields(char *lineptr[], int nlines);
static void dump_parsed_fields_xml(char *lineptr[], int nlines);

static int reverse = 0; /* 1 if sort in reverse order */
static int fold = 0; /* if fold = 1 it means case insensitive sort */
static int directory = 0; /* if directory = 1 ignore invalid characters */
static int numeric = 0; /* 1 if numeric sort */
static int num_lines_to_ignore = 0; /* ignore sorting the first n lines */
static char line1[MAXLEN];/* two lines for comparison, possibly substrings */
static char line2[MAXLEN];/* of the lines read */
static int sample_field_parse = 0; /* if 1, output the data from the fields
          defined, one field per line, no sorting
          is performed in this case*/
static int dump_as_xml = 0; /* when dumping parsed fields, format as xml data */
static int include_line = 0; /* when dumping fields, output source line
    as well. */

/* logic to handle field records */
#define RECORDSIZE 9 /* number of values in the record */
#define RECORDTYPE 0 /* offset within a record to the record type */
#define FIELDSTART 1 /* start of field as offset into the line */
#define FIELDEND 2 /* end of field as offset into the line */
#define FIELDDELIM 1 /* also could store delim char here */
#define FIELDQUOTE 2 /* also could store quote char here */
#define FIELDNUMERIC 3 /* numeric sort for this field? */
#define FIELDREV 4 /* sort this field in reverse order? */
#define FIELDFOLD 5 /* fold (case insensitive) sort for this field? */
#define FIELDDIR 6 /* directory sort for this field? */
#define FIELDOFFSET 7 /* offset of field (counting from left to right) */
#define FIELDESC 8 /* escape character typically \ */

#define RECORDTYPEOFFSET 0 /* 0 if we are considering offsets into the line */
#define RECORDTYPEDELIM 1 /* 1 if we are considering parsing delimited type data */

#define RECORDNUM 100 /* maximum number of field records */

#define MAXFIELDS RECORDNUM * RECORDSIZE /* room to hold RECORDNUM field records */
static int fieldinfo[MAXFIELDS]; /* array to hold field record data */
static int numfields = 0; /* number of fields for which options were read */
static int curfield = 0; /* field we are currently considering */

/* variables used to carry over default/last-set values between one field
 * record and the next */
static int recordtype = RECORDTYPEOFFSET; /* offsets within lines */
static int field_delim = ','; /* delimiter between fields */
static int field_quote = '"'; /* quote character */
static int field_esc = '\\'; /* for escaping field_delim, field_quote
           field_esc etc.*/

/* Sort input lines... sorts lines of input data (STDIN) and sends sorted
 * lines to output (STDOUT). Run with --h to see help.
 */

int main(int argc, char *argv[])
{
 int nlines; /* number of input lines read */
 recordinit();/* initialize the first field record */
 if (parse_args(argc, argv)) /* parse command line arguments */
  return 1;
 if ((nlines = readlines(lineptr, MAXLINES)) >= 0){
  if (sample_field_parse){
   if (dump_as_xml){
    dump_parsed_fields_xml(lineptr, nlines);  
   } else {
    dump_parsed_fields(lineptr, nlines);
   }
  } else {
   my_qsort((void **) lineptr, 0, nlines -1,
     (int (*)(const void*,const void*))compare);
   writelines(lineptr, nlines);
  }
  return 0;
 } else {
  printf("input too big to sort\n");
  return 1;
 }
}
/* parse_args: create field records from command line arguments */
static int parse_args(int argc, char *argv[])
{
 int i = 0; /* index into each argument string */
 int j = 0; /* index into numstring */
 int no_room = 0; /* if 1 stop collecting field records */
 int temp = 0; /* temporary storage of numbers */
 char numstring[NUMDIGITS];/* space to store passed number as a string */
 program_name = &(*argv)[0];
 while (--argc > 0){
  if (no_room)
   break; /* stop collecting field records */
                ++argv; /* look at the next command line argument */
  i = 0;
  j = 0;
  if (((*argv)[i] == ':') || ((*argv)[i] == ',')){
   recordinit();
   if (numfields >= RECORDNUM)
    break; /* no room to store more field records */
   continue;
  }
  if ((*argv)[i] == '-'){
   while ((*argv)[i] != '\0'){ /* allow for mashed together options */
    if (no_room)
     break; /* stop collecting field records */
    i++;
    switch((*argv)[i]){
    case 'n':
     numeric = 1;
     fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDNUMERIC] = numeric; 
     break;
    case 'r':
     reverse = 1;
     fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDREV] = reverse;
     break;
    case 'f':
     fold = 1;
     fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDFOLD] = fold;
     break;
    case 'd':
     directory = 1;
     fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDIR] = directory;
     break;
    case 's':
     i++;
     j = 0;
     while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
      numstring[j++] = (*argv)[i++];
      if (j >= NUMDIGITS) /* do not read too many digits */
       break;
     }
     i--;
     numstring[j] = '\0';
     temp = atoi(numstring); /* offset starts at 0 for first char */
     if (temp >= MAXLEN){
      printf("ERROR: The value provided for the s option"
        " is too large\n");
      return 1;
     } else {
      fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDSTART] = temp;
     }
     break;
    case 'e':
     i++;
     j = 0;
     /* slight alteration to support passing a negative number */
     while ((*argv)[i] != '\0' && (isdigit((*argv)[i]) || (*argv)[i] == '-')){
      numstring[j++] = (*argv)[i++];
      if (j >= NUMDIGITS) /* do not read too many digits */
       break;
     }
     i--;
     numstring[j] = '\0';
     temp = atoi(numstring);
     if (temp >= MAXLEN){
      printf("ERROR: The value provided for the e option"
        " is too large\n");
      return 1;
     } else {
      fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDEND] = temp;
     }
     break;
    case 'o':
     i++;
     j = 0;
     while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
      numstring[j++] = (*argv)[i++];
      if (j >= NUMDIGITS) /* do not read too many digits */
       break;
     }
     i--;
     numstring[j] = '\0';
     temp = atoi(numstring);
     if (temp >= MAXLEN){
      printf("ERROR: The value provided for the o option"
        " is too large\n");
      return 1;
     } else {
      fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDOFFSET] = temp;
     }
     break;
    case 't':
     i++;
     j = 0;
     while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
      numstring[j++] = (*argv)[i++];
      if (j >= NUMDIGITS) /* do not read too many digits */
       break;
     }
     i--;
     numstring[j] = '\0';
     temp = atoi(numstring);
     if (temp != RECORDTYPEOFFSET && temp != RECORDTYPEDELIM){
      printf("ERROR: This is an ivalid value for the"
        " t option\n");
      return 1;
     } else {
      recordtype = temp;
      fieldinfo[((numfields - 1) * RECORDSIZE) + RECORDTYPE] = recordtype;
      if (recordtype == RECORDTYPEDELIM){
       if (fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDELIM] == 0){
        fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDELIM] = field_delim;
       }
       if (fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDQUOTE] <= 0){
        fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDQUOTE] = field_quote;
       }
       if (fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDESC] == 0){
        fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDESC] = field_esc;
       }
      }
     }
     break;
    case 'm':
     i++;
     if ((*argv)[i] == '1'){
      i++;
      if ((*argv)[i] != '\0'){
       if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'P'){
        field_delim = ' ';
        i++;
       } else if ((*argv)[i] == 'T' && (*argv)[i + 1] == 'A'){
        field_delim = '\t';
        i++;
       } else if ((*argv)[i] == 'V' && (*argv)[i + 1] == 'E'){
        field_delim = '|';
        i++;
       } else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'E'){
        field_delim = ';';
        i++;
       } else if ((*argv)[i] == 'B' && (*argv)[i + 1] == 'A'){
        field_delim = '\\';
        i++;
       } else if ((*argv)[i] == 'P' && (*argv)[i + 1] == 'E'){
        field_delim = '%';
        i++;
       } else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'O'){
        field_delim = '$';
        i++;
       } else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'Q'){
        field_delim = '\'';
        i++;
       } else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'Q'){
        field_delim = '"';
        i++;
       } else {
        field_delim = (*argv)[i];
       }
       fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDELIM] = field_delim;
      } else {
       printf("ERROR: Please provide the field delimiter character"
         " immediately following the -m1 option\n");
       return 1;
      }
     } else if ((*argv)[i] == '2'){
      i++;
      if ((*argv)[i] != '\0'){
       if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'P'){
        field_quote = ' ';
        i++;
       } else if ((*argv)[i] == 'T' && (*argv)[i + 1] == 'A'){
        field_quote = '\t';
        i++;
       } else if ((*argv)[i] == 'V' && (*argv)[i + 1] == 'E'){
        field_quote = '|';
        i++;
       } else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'E'){
        field_quote = ';';
        i++;
       } else if ((*argv)[i] == 'B' && (*argv)[i + 1] == 'A'){
        field_quote = '\\';
        i++;
       } else if ((*argv)[i] == 'P' && (*argv)[i + 1] == 'E'){
        field_quote = '%';
        i++;
       } else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'O'){
        field_quote = '$';
        i++;
       } else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'Q'){
        field_quote = '\'';
        i++;
       } else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'Q'){
        field_quote = '"';
        i++;
       } else if ((*argv)[i] == 'N' && (*argv)[i + 1] == 'U'){
        field_quote = '\0';
        i++;
       } else {
        field_quote = (*argv)[i];
       }
       fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDQUOTE] = field_quote;
      } else {
       printf("ERROR: Please provide the field quote character"
         " immediately following the -m2 option\n");
       return 1;
      }
     } else if ((*argv)[i] == '3'){
      i++;
      if ((*argv)[i] != '\0'){
       if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'P'){
        field_esc = ' ';
        i++;
       } else if ((*argv)[i] == 'T' && (*argv)[i + 1] == 'A'){
        field_esc = '\t';
        i++;
       } else if ((*argv)[i] == 'V' && (*argv)[i + 1] == 'E'){
        field_esc = '|';
        i++;
       } else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'E'){
        field_esc = ';';
        i++;
       } else if ((*argv)[i] == 'B' && (*argv)[i + 1] == 'A'){
        field_esc = '\\';
        i++;
       } else if ((*argv)[i] == 'P' && (*argv)[i + 1] == 'E'){
        field_esc = '%';
        i++;
       } else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'O'){
        field_esc = '$';
        i++;
       } else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'Q'){
        field_esc = '\'';
        i++;
       } else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'Q'){
        field_esc = '"';
        i++;
       } else if ((*argv)[i] == 'N' && (*argv)[i + 1] == 'U'){
        field_esc = '\0';
        i++;
       } else {
        field_esc = (*argv)[i];
       }
       fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDESC] = field_esc;
      } else {
       printf("ERROR: Please provide the desired escape character"
         " immediately following the -m3 option\n");
       return 1;
      }
     }
     break;
    case 'i':
     i++;
     j = 0;
     while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
      numstring[j++] = (*argv)[i++];
      if (j >= NUMDIGITS) /* do not read too many digits */
       break;
     }
     i--;
     numstring[j] = '\0';
     num_lines_to_ignore = atoi(numstring);
     break;
    case '-':
     i++;
     /* --help or --HELP or --h or --H */
     if ((*argv)[i] == 'h' || (*argv)[i] == 'H'){
      print_help();
      return 1; /* exit */
     }
     /* --dump_records or --DUMP_RECORDS or --d or --D etc. */
     if ((*argv)[i] == 'd' || (*argv)[i] == 'D'){
      dump_field_records();
      return 1; /* exit */
     }
     /* --sample_field_parse or --SAMPLE_FIELD_PARSE or --s or --S */
     if ((*argv)[i] == 's' || (*argv)[i] == 'S'){
      sample_field_parse = 1;
      break; /* in case we add more ifs below */
     }
     /* --xml or --XML or --x or --X */
     if ((*argv)[i] == 'x' || (*argv)[i] == 'X'){
      dump_as_xml = 1;
      break; /* in case we add more ifs below */
     }
     /* --include_line or --INCLUDE_LINE or --i or --I */
     if ((*argv)[i] == 'i' || (*argv)[i] == 'I'){
      include_line = 1;
      break; /* in case we add more ifs below */
     }
     break;
    case ':':
     recordinit();
     if (numfields >= RECORDNUM){
      no_room = 1; /* no room to store more field records */
     }
     break;
    case ',':
     recordinit();
     if (numfields >= RECORDNUM){
      no_room = 1; /* no room to store more field records */
     }
     break;
    default:
     break;
    }
   }
  }
 }
 return 0;
}

/* Initialize some values for each new field record added */
static void recordinit(void)
{
 fieldinfo[(numfields * RECORDSIZE) + RECORDTYPE] = recordtype;
 if (recordtype){
  fieldinfo[(numfields * RECORDSIZE) + FIELDDELIM] = field_delim;
  fieldinfo[(numfields * RECORDSIZE) + FIELDQUOTE] = field_quote;
 } else {
  fieldinfo[(numfields * RECORDSIZE) + FIELDSTART] = 0;
  fieldinfo[(numfields * RECORDSIZE) + FIELDEND] = -1;
 }
 fieldinfo[(numfields * RECORDSIZE) + FIELDNUMERIC] = 0;
 fieldinfo[(numfields * RECORDSIZE) + FIELDREV] = 0;
 fieldinfo[(numfields * RECORDSIZE) + FIELDFOLD] = 0;
 fieldinfo[(numfields * RECORDSIZE) + FIELDDIR] = 0;
 fieldinfo[(numfields * RECORDSIZE) + FIELDOFFSET] = 0;
 fieldinfo[(numfields * RECORDSIZE) + FIELDESC] = field_esc;
 numfields++;
 return;
}

/* use_record: Setup to use the values from the specified
 * field record.
 */
static void use_record(const char *s1, const char *s2, int n)
{
 int fieldnum, p1, p2;
 /* set some sourcefile scoped global variables */
 recordtype = fieldinfo[(n * RECORDSIZE) + RECORDTYPE];
 numeric = fieldinfo[(n * RECORDSIZE) + FIELDNUMERIC];
 reverse = fieldinfo[(n * RECORDSIZE) + FIELDREV];
 fold = fieldinfo[(n * RECORDSIZE) + FIELDFOLD];
 directory = fieldinfo[(n * RECORDSIZE) + FIELDDIR];
 /* set type specific variables and initialize line1 and line2 */
 if (recordtype){
  field_delim = fieldinfo[(n * RECORDSIZE) + FIELDDELIM];
  field_quote = fieldinfo[(n * RECORDSIZE) + FIELDQUOTE];
  fieldnum = fieldinfo[(n * RECORDSIZE) + FIELDOFFSET];
  field_esc = fieldinfo[(n * RECORDSIZE) + FIELDESC];
  substr_delim(line1, s1, fieldnum);
  substr_delim(line2, s2, fieldnum);
 } else {
  p1 = fieldinfo[(n * RECORDSIZE) + FIELDSTART];
  p2 = fieldinfo[(n * RECORDSIZE) + FIELDEND];
  substr(line1, s1, p1, p2);
  substr(line2, s2, p1, p2);
 }
 //printf("line1 is %s line2 is %s\n", line1, line2);
 return;
}

/* my_qsort: sort v[left] ... v[right] into
 * increasing or decreasing order depending on
 * the value of reverse.*/
static void my_qsort(void *v[], int left, int right,
  int (*comp)(const void*, const void*))
{
 int i, last;
 if (left >= right) /* do nothing if array contains */
  return; /* fewer than two elements */
 swap(v, left, (left + right)/2);
 last = left;
 for (i = left + 1; i <= right; i++)
  if ((*comp)(v[i], v[left]) < 0)
   swap(v, ++last, i);
 swap(v, left, last);
 my_qsort(v, left, last - 1, comp);
 my_qsort(v, last + 1, right, comp);
}
/* compare: Parent process for comparisons. I am able
 * to dynamically make decisions concerning whether numcmp
 * or str_cmp should be called in here.
 * Whether you code the reverse logic here or in qsort, it
 * needs to support changing dynamically based on what field
 * record is currently loaded.
 */
static int compare(const char *s1, const char *s2)
{
 int retval;
 curfield = 0;
 do{
  use_record(s1, s2, curfield);
  if (numeric)
   retval = numcmp(line1, line2);
  else
   retval = str_cmp(line1, line2);
  if (retval == 0){
   curfield++;
  }
 } while (retval == 0 && curfield < numfields);
 if (reverse){
  if (retval > 0){
   retval = -1;
  } else if (retval < 0){
   retval = 1;
  }
 }
 return retval;
}

/* numcmp: compare s1 and s2 numerically */
static int numcmp(const char *s1, const char *s2)
{
 double v1, v2;
 v1 = atof(s1);
 v2 = atof(s2);
 if (v1 < v2)
  return -1;
 else if (v1 > v2)
  return 1;
 else
  return 0;
}
/* str_cmp: replaces standard library strcmp to add
 * more features for types of comparison. Supports
 * case insensitive comparison for example. Supports
 * ignoring invalid characters. Borrowed version of this
 * function by Barrett Drawdy as it was cleaner than
 * mine.
 */
static int str_cmp(const char *s1, const char *s2)
{
 if (directory){
  while (!isdigit(*s1) && !isalpha(*s1) && !isspace(*s1) && *s1)
   ++s1; /* ignore bad characters */
  while (!isdigit(*s2) && !isalpha(*s2) && !isspace(*s2) && *s2)
   ++s2; /* ignore bad characters */
 }
 while (fold ? (tolower(*s1) == tolower(*s2)) : (*s1 == *s2)){
  if (*s1 == '\0')
   return 0;
  ++s1;
  ++s2;
  if (directory){
   while (!isdigit(*s1) && !isalpha(*s1) && !isspace(*s1) && *s1)
    ++s1; /* ignore bad characters */
   while (!isdigit(*s2) && !isalpha(*s2) && !isspace(*s2) && *s2)
    ++s2; /* ignore bad characters */
  }
 }
 return fold ? (tolower(*s1) - tolower(*s2)) : (*s1 - *s2);
}
/* copy characters from offset p1 to p2 (inclusive) of s2 to s1 */
static void substr(char *s1, const char *s2, int p1, int p2)
{
 int i, j;
 int length = strlen(s2);
 if (p1 + 1 >= length){ /* desired field is missing from line */
  s1[0] = '\0';
  return;
 }
 if (p2 < p1)
  p2 = length;
 /* if p2 is too big we will simply set s1 to whatever is left */
 for (i = p1, j = 0; i <= p2 && s2[i] != '\0'; i++, j++)
  s1[j] = s2[i];
 s1[j] = '\0';
 return;
}

/* substr_delim: copy characters from the delimited field number n of
 * s2 to s1.
 * field_delim is counted if it is outside of a pair of field_quote
 * (a quoted field), and not escaped with a field_esc. Otherwise
 * it is just copied as part of the data.
 *
 * If field_esc is followed by field_esc, field_delim, or field_quote
 * it is skipped and the following character is copied as part of
 * the data. Otherwise field_esc is just copied as part of the data.
 *
 * field_quote only has special meaning if it is at the very beginning
 * of a field, or if it is at the very end of a field and follows a
 * field_quote that was at the very beginning of a field. If field_quote
 * is seen in the middle of the field it is just considered part of the
 * data.
 *
 * This logic should allow substr_delim to work with data that has been
 * prepared using strict quoting and escaping rules, while allowing
 * the most flexibility for handling data that was not strictly quoted
 * and escaped.
 *
 */
static void substr_delim(char *s1, const char *s2, int n)
{
 int delim_count = 0; /* the first field is number 0 */
 int field_quote_on = 0; /* are we inside of quotes? */
 /* i, j,  indexes into s2 and s1 */
 int i = 0;
 int j = 0;
 while (s2[i] != '\0'){
  if (i == 0 && s2[i] == field_quote){
   field_quote_on = 1;
   ++i;
   continue;
  }
  if (s2[i] == field_esc){
   if (s2[i + 1] == field_quote ||
     s2[i + 1] == field_delim ||
     s2[i + 1] == field_esc){
    ++i; /* skip this field_esc and copy the next char */
                         s1[j++] = s2[i++];
                         continue;
   }
  }
  if (field_quote_on == 1 &&
    s2[i] == field_quote &&
    (s2[i + 1] == field_delim || s2[i + 1] == '\0')){
   field_quote_on = 0;
   ++i;
   continue;
  }
  if (field_quote_on == 0 && s2[i] == field_delim){
   ++delim_count;
   ++i;
   if (s2[i] == field_quote){
    field_quote_on = 1;
    ++i;
   }
   continue;
  }
  if (delim_count == n){
   s1[j++] = s2[i];
  } else if (delim_count > n){
   break;
  }
  ++i;
 }
 s1[j] = '\0';
 return;
}

/* swap pointers: void * is used so that swap can work on any pointer type.
 * Any pointer can be cast to void * and back again without loss of
 * information
 */
static void swap(void *v[], int i, int j)
{
 void *temp;
 temp = v[i];
 v[i] = v[j];
 v[j] = temp;
}

static char *alloc(int);
/* readlines: read input lines */
static int readlines(char *lineptr[], int maxlines)
{
 int len, nlines;
 char *p, line[MAXLEN];
 nlines = 0;
 while ((len = getLine(line, MAXLEN)) > 0)
  if (num_lines_to_ignore){
   if (!sample_field_parse) /* if we are sampling fields we may not
          want the ignored lines output */
    printf("%s", line);
   --num_lines_to_ignore;
  } else {
   if(nlines >= maxlines || (p = alloc(len)) == NULL)
    return -1;
   else {
    line[len - 1] = '\0'; /* delete newline */
    strcpy(p, line);
    lineptr[nlines++] = p;
   }
  }
 return nlines;
}

/* writelines: write output lines */
static void writelines(char *lineptr[], int nlines)
{
 while (nlines-- > 0)
  printf("%s\n", *lineptr++);
}

/* getLine: read a line into s, return length */
static int getLine(char *s, int lim)
{
 int c, i;
 for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; ++i)
  s[i] = c;
 if (c == '\n'){
  s[i] = c;
  ++i;
 }
 s[i] = '\0';
 return i;
}

static char allocbuf[ALLOCSIZE]; /* storage for alloc */
static char *allocp = allocbuf; /* next free position */
static char *alloc(int n) /* return pointer to n characters */
{
 if (allocbuf + ALLOCSIZE - allocp >= n){ /* it fits */
  allocp += n;
  return allocp - n; /* old p */
 } else {
  return NULL;
 }
}


/* dump_parsed_fields: Apply the field position/offset/delimiters
 * defined and dump the resulting field contents, one field per
 * line, repeat for all input lines. Useful for debugging how
 * the field position and contents are identified for the sort
 */
static void dump_parsed_fields(char *lineptr[], int nlines)
{
 int n = 0;
 int i = 1;
 int fieldnum, p1, p2;
 while (i <= nlines){
  if (include_line)
   printf("%s\n", *lineptr);
  n = 0;
  do{
   recordtype = fieldinfo[(n * RECORDSIZE) + RECORDTYPE];
   if (recordtype){
    field_delim = fieldinfo[(n * RECORDSIZE) + FIELDDELIM];
    field_quote = fieldinfo[(n * RECORDSIZE) + FIELDQUOTE];
    fieldnum = fieldinfo[(n * RECORDSIZE) + FIELDOFFSET];
    field_esc = fieldinfo[(n * RECORDSIZE) + FIELDESC];
    substr_delim(line1, *lineptr, fieldnum);
   } else {
    p1 = fieldinfo[(n * RECORDSIZE) + FIELDSTART];
    p2 = fieldinfo[(n * RECORDSIZE) + FIELDEND];
    substr(line1, *lineptr, p1, p2);
   }
   printf("%s\n", line1);/* output the parsed field */
   n++;
  } while (n < numfields);
  lineptr++;
  i++;
 }
 return;
}
/* dump_parsed_fields_xml: Apply the field position/offset/delimiters
 * defined and dump the resulting field contents, in an xml format.
 */
static void dump_parsed_fields_xml(char *lineptr[], int nlines)
{
 int n = 0;
 int i = 1;
 int fieldnum, p1, p2;
 printf("<root>\n");
 while (i <= nlines){
  printf("\t<line%d>", i);
  n = 0;
  do{
   recordtype = fieldinfo[(n * RECORDSIZE) + RECORDTYPE];
   if (recordtype){
    field_delim = fieldinfo[(n * RECORDSIZE) + FIELDDELIM];
    field_quote = fieldinfo[(n * RECORDSIZE) + FIELDQUOTE];
    fieldnum = fieldinfo[(n * RECORDSIZE) + FIELDOFFSET];
    field_esc = fieldinfo[(n * RECORDSIZE) + FIELDESC];
    substr_delim(line1, *lineptr, fieldnum);
   } else {
    p1 = fieldinfo[(n * RECORDSIZE) + FIELDSTART];
    p2 = fieldinfo[(n * RECORDSIZE) + FIELDEND];
    substr(line1, *lineptr, p1, p2);
   }
   printf("<f%d>%s</f%d>", n, line1, n);/* output the parsed field */
   n++;
  } while (n < numfields);
  printf("</line%d>\n", i);
  lineptr++;
  i++;
 }
 printf("</root>\n");
 return;
}

/* dump_field_records: for debugging purposes you may wish to
 * dump all the field records that have been stored
 */
static void dump_field_records(void)
{
 int i;
 int c;
 printf("\n---------------------------| Field Record Dump |-----------------------------\n");
 printf("Record | COL0 | COL1 | COL2 | COL3 | COL4 | COL5 | COL6 | COL7 | COL8\n");
 for (i = 0; i < numfields; i++){
  printf(" %d\t", i);
  printf("| %d\t", fieldinfo[(i * RECORDSIZE) + RECORDTYPE]);
  if (fieldinfo[(i * RECORDSIZE) + RECORDTYPE] == RECORDTYPEOFFSET){
   printf("| %d\t", fieldinfo[(i * RECORDSIZE) + FIELDSTART]);
   printf("| %d\t", fieldinfo[(i * RECORDSIZE) + FIELDEND]);
  } else {
   c = fieldinfo[(i * RECORDSIZE) + FIELDDELIM];
   if (c == '\t'){
    printf("| TAB\t");
   } else if (c == ' '){
    printf("| SPACE\t");
   } else {
    printf("| %c\t", c);
   }
   c = fieldinfo[(i * RECORDSIZE) + FIELDQUOTE];
   if (c == '\t'){
    printf("| TAB\t");
   } else if (c == ' '){
    printf("| SPACE\t");
   } else if (c == '\0'){
    printf("| NULL\t");
   } else {
    printf("| %c\t", c);
   }
  }
  printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDNUMERIC]);
  printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDREV]);
  printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDFOLD]);
  printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDDIR]);
  printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDOFFSET]);
  c = fieldinfo[(i * RECORDSIZE) + FIELDESC];
  if (c == '\t'){
   printf("| TAB\t");
  } else if (c == ' '){
   printf("| SPACE\t");
  } else if (c == '\0'){
   printf("| NULL\t");
  } else {
   printf("| %c\t", c);
  }  
 }
 printf("\nWhere COL0 = RECORDTYPE, COL1 = FIELDSTART or FIELDDELIM\n");
 printf("COL2 = FIELDEND or FIELDQUOTE, COL3 = FIELDNUMERIC, COL4 = FIELDREV\n");
 printf("COL5 = FIELDFOLD, COL6 = FIELDDIR, COL7 = FIELDOFFSET, COL8 = FIELDESC\n\n"); 
 printf("RECORDTYPE is set by -t\n");
 printf("FIELDSTART is set by -s\n");
 printf("FIELDDELIM is set by -m1\n");
 printf("FIELDEND is set by -e\n");
 printf("FIELDQUOTE is set by -m2\n");
 printf("FIELDNUMERIC is set by -n\n");
 printf("FIELDREV is set by -r\n");
 printf("FIELDFOLD is set by -f\n");
 printf("FIELDDIR is set by -d\n");
 printf("FIELDOFFSET is set by -o\n");
 printf("FIELDESC is set by -m3\n");
 printf("For more info use the --h (help) option.\n\n");
 return;
}

static void print_help(void)
{
 printf("Program: %s\n", program_name);
 printf("Author: %s\n", AUTHOR);
 printf("Creation Date: %s\n", CREATION_DATE);
 printf("Last Update: %s\n", LAST_UPDATE);
 printf("usage: cat sourcefile | ./sort -options [> outputfile]\n\n");
 printf("This sort program expects input from STDIN (output from cat for\n");
 printf("example) and sends output to STDOUT (the screen for example).\n\n");
 printf("If you fail to provide input from cat or similar you will be in\n");
 printf("an interactive input mode. This means you can enter lines using\n");
 printf("the keyboard and press CTRL-D on an empty line to process them.\n\n");
 printf("Pressing CTRL-C will abort the program.\n\n");
 printf("%s is quite sophisticated, permitting you to break a line\n", program_name);
 printf("into fields that can have separate sorting options applied to\n");
 printf("them.\n\n");
 printf("It is always lines that are sorted, not the fields within the line.\n");
 printf("However defining fields and specifying sort options for them permits\n");
 printf("sophisticated sorting behavior for the lines.\n\n");
 printf("You do not need to provide sorting options for every field on a line.\n");
 printf("Only specify those fields that have the data that you wish to use\n");
 printf("to sort the lines.\n\n");
 printf("The order that you specify the fields on the command line is the order\n");
 printf("of precedence for sorting.\n\n");
 printf("For example...\n");
 printf("if one field has a username and another has a date, you can sort\n");
 printf("by increasing (ascending) username and then by decreasing (descending)\n");
 printf("date simply by specifying those 2 fields and their sort options on the\n");
 printf("command line. Specify the username field first and the date field\n");
 printf("next. The way the logic works is only if there are equal values\n");
 printf("found in the first field, is the second field examined, and only if\n");
 printf("there are equal values found in the second field is the third field\n");
 printf("examined, and so on until either a difference is found between the\n");
 printf("two fields in question, or we run out of fields that we have defined\n");
 printf("for sorting. If we run out of fields that we have defined for sorting\n");
 printf("and no difference has been found, the lines are considered equal.\n\n");
 printf("This sort program supports the use of up to %d fields with unique\n", RECORDNUM);
 printf("sort options permitted for each field. The data can have any number of\n");
 printf("fields, but you can only specify sort options for %d fields.\n", RECORDNUM);
 printf("Actually the data is limited in fields per line by the setting\n");
 printf("for the maximum line length of %d characters and the size of the fields.\n\n",MAXLEN);
 printf("The field definitions (size/location) along with the sort options\n");
 printf("are saved internally in records. There is a handy option that\n");
 printf("permits dumping these internal records so that you can evaluate\n");
 printf("how the program has interpreted the command line options that you\n");
 printf("have provided. Place this option --d (--dump_records) after all\n");
 printf("the options that you desire to audit have been specified on the command\n");
 printf("line.\n\n");
 printf("--d dump field records, place after other options on the command line.\n");
 printf(" If you use this option the sort is not performed, this option is used\n");
 printf(" strictly for debugging your sort options that you have defined\n\n");
 printf("%s allows you to specify fields using character offsets, from the\n", program_name);
 printf("start of the line at position 0, or using delimiters and quote characters.\n");
 printf("Each field definition can optionally use either method.\n");
 printf("-t# type of field definition, where # is %d for the character offset method\n", RECORDTYPEOFFSET);
 printf(" and %d for the delimited field method. This option should be listed\n", RECORDTYPEDELIM);
 printf(" first in the field definition, but inherits from left to right so\n");
 printf(" if all field definitions are of the same type it only needs to be\n");
 printf(" specified for the first field for example. The default is the\n");
 printf(" character offset method\n");
 printf("-s# starting character position, where # is a number that is less than\n");
 printf(" the maximum line length of %d\n", MAXLEN);
 printf("-e# ending character position, where # is a number that is less than\n");
 printf(" the maximum line length of %d\n", MAXLEN);
 printf("If you choose to use the character offset method of defining a field you must\n");
 printf("set -s and -e for each field to the correct offsets, sort will not check to\n");
 printf("see that you did.\n\n");
 printf("If you set -e to less than -s, it means that you want from -s to the end of\n");
 printf("the line. The default setting for sort is to use the character offset method\n");
 printf("and -s is set to 0 (the beginning) and -e is set to -1 (the rest of the line).\n");
 printf("Because of the special meaning of -e set to be less than -s, -e supports\n");
 printf("passing a negative number -e-1 for -1.\n\n");
 printf("The default delimiters for delimited data is to use a comma ',' to separate\n");
 printf("fields and to use double quotes '\"', to quote fields. The quotes surround the\n");
 printf("fields to indicate that any commas that are found within the fields can be\n");
 printf("ignored.\n\n");
 printf("In order to support the possibility of a double quote found within the data\n");
 printf("an escape character can be used and the default escape character is a\n");
 printf("backslash '\\'.\n\n");
 printf("In actuality, this program is coded to be more flexible than that. An escape\n");
 printf("character 'can' be used to escape the delimiter, the quote character or an\n");
 printf("escape character, either inside or outside of a quoted field. If the escape\n");
 printf("character is found in front of anything else it is considered part of the data.\n\n");
 printf("If the quote character is found anywhere besides the start or end of a field it\n");
 printf("is considered part of the data and as such, technically, does not need to be\n");
 printf("escaped. So this program should be able to handle data formatted according to a\n");
 printf("variety of specifications.\n\n");
 printf("If this is not enough, the quote and/or the escape character can also be\n");
 printf("disabled in cases where they are not required and yet may be found in the data\n");
 printf("(see the NU code below).\n\n");
 printf("You can define what characters to use to separate fields, to quote fields,\n");
 printf("and to escape quote characters.\n\n");
 printf("-m1n field separator, where n is the desired character\n");
 printf("-m2n quote character, where n is the desired character\n");
 printf("-m3n escape character, where n is the desired escape character\n\n");
 printf("In the above 3 options instead of specifying the literal character desired\n");
 printf("as n you can use the following 2 letter (uppercase) codes:\n\n");
 printf("SP to mean a SPACE\n");
 printf("TA to mean a TAB\n");
 printf("VE to mean a VERTICAL BAR '|'\n");
 printf("SE to mean a SEMICOLON ';'\n");
 printf("BA to mean a BACKSLASH '\\'\n");
 printf("PE to mean a PERCENT SIGN '%%'\n");
 printf("DO to mean a DOLLAR SIGN '$'\n");
 printf("SQ to mean a SINGLE QUOTE '\n");
 printf("DQ to mean a DOUBLE QUOTE \"\n\n");
 printf("NU to mean NULL '\\0', is supported for the quote or escape character.\n");
 printf(" Since such a character will not be seen in the data (it is the string\n");
 printf(" terminator) it is used to disable the operation of the quote or escape\n");
 printf(" character if that is ever desired.\n\n");
 printf("When using the delimited field method to specify fields it is important to\n");
 printf("indicate which field in the data we are referring to. Counting from 0 for the\n");
 printf("leftmost field on a line you can indicate field numbers using the -o option.\n");
 printf("-o# indicate which delimited field, where # is a number from 0 to however\n");
 printf(" many fields exist in the data\n\n");
 printf("Every new field record initializes -o to be 0, referring to the first field, if\n");
 printf("this is not the field you want you must set the -o option to the correct field\n");
 printf("number.\n\n");
 printf("Using either method of specifying a field, the character offset method or the\n");
 printf("delimited method, if a specified field does not exist in the line in question\n");
 printf("it is treated as an empty field. If a large number of lines are missing this\n");
 printf("field and it is the only sort field that you indicated the sort will be slow.\n");
 printf("Qsort does not like it when too many lines evaluate to be equal.\n\n");
 printf("Between field definitions, to indicate the start of a new field, you can use\n");
 printf("either a colon ':' or a comma ','. Be careful not to place a field separator at\n");
 printf("the beginning as by default it will indicate to use the entire line with the\n");
 printf("default sorting options as the first field for sorting.\n\n");
 printf("The sorting options are:\n\n");
 printf("-n numeric sort, puts numbers in order of value\n");
 printf("-r reverse the sort order, instead of increasing order it would be\n");
 printf(" decreasing order\n");
 printf("-f fold upper and lower case together, or in other words do a case\n");
 printf(" insensitive sort\n");
 printf("-d directory sort, this ignores any character that is not a letter,\n");
 printf(" number or space\n\n");
 printf("The default sort options are set to have all of these options off, which\n");
 printf("means punctuation characters, or other special characters have a sorting\n");
 printf("value, the sort is in increasing order, upper and lower case letters\n");
 printf("have different sorting values, 17 would be considered lower than 2\n");
 printf("(numeric value is not considered).\n\n");
 printf("Options can be specified individually on the command-line separated by\n");
 printf("spaces...\n");
 printf("cat sourcefile | ./sort -t1 -o1 -n , -o0 -f -d\n\n");
 printf("or they can be mashed together...\n");
 printf("cat sourcefile | ./sort -t1o1n,o0fd\n\n");
 printf("However do not put spaces between an option and its value. -s 99 is not\n");
 printf("an accepted parameter, the option should be indicated as -s99. Also the\n");
 printf("use of an equals sign '=' is not supported between an option and its\n");
 printf("value.\n\n");
 printf("Do not confuse the use of the -s (start) and -e (end) options together\n");
 printf("with the delimited field option. It will replace whatever is being used\n");
 printf("for the delimiter and quote character with whatever character happens to\n");
 printf("equal the numeric value you provide. If you really know what you are\n");
 printf("doing it can be useful, otherwise avoid it.\n\n");
 printf("There is an option to ignore the first x number of lines. You might use\n");
 printf("this option if your data includes a header line with column titles for\n");
 printf("example and you do not want this line sorted in with the data.\n");
 printf("-i# ignore the first # of lines, where # is a number between 0 and 9999.\n");
 printf(" 0 in this case has no logical meaning since we start counting lines\n");
 printf(" with 1 as the first line.\n\n");
 printf("A Sorting Example:\n");
 printf("cat sourcefile | ./sort -t1o1n,o0fd\n\n");
 printf("Means use the delimited field method (-t1), the primary sort field is the\n");
 printf("second field (o1), do a numerical sort on that field (n), if two identical\n");
 printf("numbers are found in the second field, look at the first field (o0), doing a\n");
 printf("case insensitive directory sort of the first field, decide the order for\n");
 printf("the two lines. The default delimiter, quote character, and escape character\n");
 printf("is used.\n\n");
 printf("If the sort appears to be excessively slow, it could be because the fields and\n");
 printf("options that you have selected result in too many lines that would have an\n");
 printf("equal sort order. I think this is a limitation of the qsort logic since\n");
 printf("specifying more fields and more options, so that the lines which evaluate\n");
 printf("to be equal are reduced, significantly speeds up the sort.\n\n");
 printf("A few additional options have been added to aid in debugging the\n");
 printf("specification of fields. You may find other uses for them as well. The option\n\n");
 printf("--s for sample field parsing\n\n");
 printf("found anywhere in the options will signify that instead of sorting the input\n");
 printf("you want to output the data from the fields that you have indicated. This\n");
 printf("will assist you in debugging delimiters and offset etc. when specifying\n");
 printf("fields for delimited data, and identifying errors in character offset\n");
 printf("specification when using the character offset method of identifying fields.\n");
 printf("It can be most useful when analyzing one field specifier at a time as the\n");
 printf("fields will be dumped one field per line so as not to introduce new field\n");
 printf("separators to confuse the issue when identifying what data was exactly\n");
 printf("pulled.\n\n");
 printf("There is a further specifier that can be added to also dump the source line\n");
 printf("that the fields were pulled from\n\n");
 printf("--i for include line with the field parse dump\n\n");
 printf("For assisting with debugging what is parsed when multiple fields are specified\n");
 printf("there is an option to format the fields into an xml document. No effort is made\n");
 printf("to escape invalid characters found in the data, the data is left as is.\n\n");
 printf("--x format parsed fields into an xml document format\n\n");
 printf("If you specify to use the xml format, the --i option is ignored, there is no\n");
 printf("option to include the original line in the xml format. The field numbering\n");
 printf("used in the xml output is not the number of the field with respect to the\n");
 printf("data but the number of the field record specified on the command line, staring\n");
 printf("with 0 as the leftmost field record specified\n\n");
 printf("Unless --s is specified --x and --i will be ignored.\n\n");
 return;
}

'The C Programming Language' 카테고리의 다른 글

Chapter 6 - Structures 3  (0) 2017.12.22
Chapter 6 - Structures 1  (0) 2017.12.22
Chapter 5 - Pointers and Arrays 13  (0) 2017.12.22
Chapter 5 - Pointers and Arrays 12  (0) 2017.12.22
Chapter 5 - Pointers and Arrays 11  (0) 2009.03.27