/* cut.c
   This program cuts out selected portions of each line of a file
   Usage: cut [-c<range>] [-f<range>] [-d[delimiter>] -s -w [file [... filen]]
   Options:
   -c cut by column range
   -f cut by field range
   -d use field delimiter
   -s suppress lines with no field delimiters
   -w use whitespace (spaces and tabs) for field delimeters
   Multiple ranges can be specified with commas.
   A range always includes a dash, optionally preceded with a number and likewise optionally followed by a number.
   A range beginning with dash (-) implicitly means start with the first column.
   A range without an ending number means all characters through the end of line.
   change log:
   08/18/2023 initial version
   08/18/2023 fixed reading default stdin (rather than stdout)
   08/18/2023 corrected line supression logic (-s)
   08/20/2023 corrected field handling such that every delimiter separators fields and not skipping as strtok does. strsep
              was added to handle this
   08/21/2023 replace use of fprintf with dylib.fputs to conserve memory
   12/10/2023 removed reference to string_ext.h
   02/27/2024 added use of dylib
   03/01/2024 modified to send strings to dylib.fputs rather than literals
   01/20/2025 moved fputs to dylib
   06/15/2025 updated exit references
*/

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <stdio.h>
#include <dylib.h>

// prints the usage statement
const char *msg1 = "usage: cut -c list [file ...]\n";
const char *msg2 = "       cut -f list [-s] [-w | -d delim] [file ...]\n";
void print_usage () {
   dylib.fputs (msg1, stdout);
   dylib.fputs (msg2, stdout);
}

// maximum length of an input string
#define MAX_INPUT_STR 256

// the mask type
typedef struct {
   bool v[MAX_INPUT_STR];
} mask_t;

// calculates the column and field mask ranges
bool calculate_mask (mask_t *mask, const char *range) {
   int i;

   char trange[256];
   dylib.strcpy (trange, range);

   // initialize the entire mask to false
   for (i = 0; i < MAX_INPUT_STR; i++) {
      mask->v[i] = false;
   }

   // capture the first range
   char *p = dylib.strtok (trange, ",");
   char *q;
   int bi, ei;
   // loop through all ranges
   while (p) {
      // determine if this is a single position or a range separated with a dash
      q = strstr (p, "-");
      if (q) {
         *q = 0;          // null terminate the dask
         q++;             // move to the next position
         bi = atoi (p);   // capture the begin index. This will also handle an open started range (-n)
         ei = atoi (q);   // capture the end index
         if (!ei) {       // handle open ended range (n-)
            ei = MAX_INPUT_STR - 1;
         }
      } else {
         bi = atoi (p);   // capture the index
         ei = bi;         // copy the index
      }
      // set the mask true for the range
      for (i = bi; i <= ei; i++) {
         mask->v[i] = true;
      }

      p = dylib.strtok (NULL, ",");
   }
   return false;
}

// processes one file
const char *cr = "\n";
void process_file (FILE *f, bool is_column_mode, bool is_field_mode, mask_t *mask, char *sep, bool suppress_lines_wo_delims) {
   char s[MAX_INPUT_STR];   // the input string
   char *p;                 // pointer to a field
   char *k;                 // pointer to separator
   bool s_has_sep;          // indicates whether there is a separator in s
   int j;                   // loop var
   bool first;              // indicates whether the current output is the first on a line

   // read all lines of the file
   while (dylib.fgets (s, sizeof (s), f)) {
      // remove CR/LF
      s[dylib.strcspn (s, "\r\n")] = 0;
      // process column mode
      if (is_column_mode) {
         // loop through the entire string
         for (j = 0; j < dylib.strlen (s); j++) {
            // print the character if the mask is true
            if (mask->v[j + 1]) {
               fputc (s[j], stdout);
            }
         }
         // write a line feed
         fputc ('\n', stdout);

      // process field mode
      } else if (is_field_mode) {
         // assess if there is a delimitor on this line

         k = sep;
         s_has_sep = false;
         while ((*k != 0x00) && (!s_has_sep)) {
            s_has_sep = (dylib.strchr (s, *k) != NULL);
            k++;
         }

         if (s_has_sep || !suppress_lines_wo_delims)  {
            // capture the first field token
            char *k = s;
            p = strsep (&k, sep);
            // initialize to the first field
            j = 0;
            // initialize that this is the first field to be printed
            first = true;
            // loop through all tokens
            while (p) {
               // print the field if the mask is true
               if (mask->v[j + 1]) {
                  // if the first field to be printed, unset, or print a separator
                  if (first) {
                     first = false;
                  } else {
                     fputc (*sep, stdout);
                  }
                  // print the field
                  dylib.fputs (p, stdout);
               }
               // get the next token
               p = strsep (&k, sep);
               // increment the field counter
               j++;
            }
            // write a line feed
            fputc ('\n', stdout);
         } else {
            if (!suppress_lines_wo_delims) {
               dylib.fputs (s, stdout);
               dylib.fputs (cr, stdout);
            }
         }
      }
   }
}

// the cut program
const char *msg_bad_delim   = "cut: bad delimiter\n";
const char *msg_cut_name    = "cut: ";
const char *msg_no_file_dir = ": no such file or directory\n";

int main (int argc, char *argv[]) {

   bool is_column_mode = false;           // indicates if in column mode
   bool is_field_mode  = false;           // indicates if in field mode
   mask_t mask;                           // the column and field mask
   char sep[4];                           // the field delimeters
   bool suppress_lines_wo_delims = false; // suppress blank lines when in field mode
   bool error          = false;           // indicates if an error was generated in options processing
   bool field_opts_set = false;           // indicates if the delimiter was set

   dylib.strcpy (sep, "\t");              // initialize the field separator to the default

   // capture the options
   int opt;
   while ((opt = getopt (argc, argv, "c:f:d:sw")) != -1) { // loop through all options
      switch (opt) {                                       // switch on an option
         case 'c':                                         // turn on column mode
            is_column_mode = true;
            error |= calculate_mask (&mask, optarg);
            break; 
         case 'f':                                         // turn on field mode
            is_field_mode = true;
            error |= calculate_mask (&mask, optarg);
            break;
         case 'd':                                         // capture specified field delimiter
            if (dylib.strlen (optarg) != 1) {              // ensure exactly one character was found
               dylib.fputs (msg_bad_delim, stderr);
               error = true;
            } else {
               dylib.strcpy (sep, optarg);
               field_opts_set = true;
            }
            break; 
         case 'w':                                         // set whitespace as the field delimiters
            dylib.strcpy (sep, "\t ");
            field_opts_set = true;
            break;
         case 's':
            suppress_lines_wo_delims = true;
            field_opts_set = true;
            break;
         case '?':
         default:
            break;
      }
   }

   // ensure options have placed the mode into column or field
   if ((is_column_mode && is_field_mode) || 
       (!is_column_mode && !is_field_mode) ||
       (is_column_mode && field_opts_set)) {
      print_usage ();
      error = true;
   }

   // exit if there was an error while processing options
   if (error) {
      exit (0);
   }

   // adjust argc and argv removing the processed options
   argc -= optind; 
   argv += optind; 

   if (argc) {
      // loop through all files
      for (int i = 0; i < argc; i++) {
         // open the file
         FILE *f = dylib.fopen (argv[i], "r");
         // confirm the file is open
         if (f) {
            process_file (f, is_column_mode, is_field_mode, &mask, sep, suppress_lines_wo_delims);
            dylib.fclose (f);
         } else {
            // file failed to open
            dylib.fputs (msg_cut_name, stderr);
            dylib.fputs (argv[i], stderr);
            dylib.fputs (msg_no_file_dir, stderr);
         }
      }
   } else {
      process_file (stdin, is_column_mode, is_field_mode, &mask, sep, suppress_lines_wo_delims);
   }

   return 0;
}
