/*
 Selects everything but the specified columns from an rdbtable.

 Author: Carlo Strozzi <carlos@linux.it>
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define MAX_ESCAPE_LENGTH       256
#define MAX_ARG_LENGTH          1024
#define EMPTY                   ""

void show_help( char *my_name)
{
    printf("
        NoSQL operator: %s

Usage:  %s [options] 'column [ column ... ]'

Options:
    -help    Print this help info.
    -n       Strip header from output.
    -x       Debug option.
    -e \"abc\" Escape characters in set [a,b,c] by prepending a
             backslash to each of them on STDOUT.

Takes a list of column names that are NOT to be selected and prints
the others to STDOUT.  Chars that are special to the UNIX shell must
be quoted.

Column names are in the form 'column_1 column_2 ...'.
For example, to select everything except columns 'NAME' and 'JOB' from
the input rdbtable the statement is:

                        'NAME  JOB' 

Note how the list of columns must be quoted, i.e. it must be one
single token. Long lists of column names may be folded over multiple
lines by ending each line with a backslash, i.e.:

             'COL1 COL2 COL3 COL4 COL5 COL6 COL7 %s
              COL8 COL9 COL10'

This operator reads an rdbtable via STDIN and writes an rdbtable
via STDOUT. Non existent columns specified on the command line are
silently ignored. If only an invalid column name is specified, then
the whole input table is writen to STDOUT (with any duplicated columns
removed). If all of the table columns are specified, then only the
table comments, if any, are writen to STDOUT.

Note: this operator is definitely faster than nsq-col, but slower
than nsq-fcol. The only disadvantage of the latter is that excluding a
column requires that all the other columns be listed explicitly on
the command line, which may be impractical if they are numerous, and
this is why %s has been provided.


$Id: nsq-ncol.c,v 1.1 1998/05/29 20:43:01 carlos Exp $

            ----------------------
NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.
This program comes with ABSOLUTELY NO WARRANTY; for details
refer to the GNU General Public License.
    
You should have received a copy of the GNU General Public License
along with this program;  if not, write to the Free Software
Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 02111-1307
USA.
            ----------------------\n", my_name, my_name, "\\", my_name);
}

int main( int  argc, char *argv[] ) {

  /* For getopt() */
  extern char* optarg;
  extern int optind;

  /* for the rest of the program. */
  register int
    a_loop;
  char
    *my_name=argv[0],
    cmd_buf[MAX_COMMAND_LENGTH],
    e_set[MAX_ESCAPE_LENGTH] = EMPTY,
    c_names[MAX_ARG_LENGTH] = EMPTY;

  int no_hdr=0, debug=0;

  while ((a_loop = getopt(argc, argv, "xnhe:")) != EOF) {
    switch (a_loop) {
      case 'h':
        show_help(my_name);
        exit(0);
        break;
      case 'n': 
        no_hdr=1;
        break;
      case 'x': 
        debug=1;
        break;
      case 'e':
        snprintf(e_set,MAX_ESCAPE_LENGTH,"%s", optarg);
        break;
      default:
        show_help(my_name);
        exit(1);
    }
  }

  if( optind < argc )
    snprintf(c_names,MAX_ARG_LENGTH,"%s", argv[optind++]);
  else {
    show_help(my_name);     /* No columns where specified */
    exit(1);
  }

  snprintf(cmd_buf,MAX_COMMAND_LENGTH,"#
#
BEGIN { NULL = \"\"
  FS = \"\\t\"; OFS = FS;
  # Honour the 'debug' switch.
  if( %d ) {
    arg_vec = \"# ARGC = \" ARGC

    for( arg in ARGV )
      arg_vec = arg_vec \"\\n# ARGV[\" arg \"] = \" ARGV[arg]

    print arg_vec > \"/dev/stderr\"
  }
}
# Table comments.
r == 0 && $0 ~ /^ *#/ {
  if( ! %d ) print
  next
}
# Column names.
r == 0 {
  # Get requested column names.
  split( \"%s\", c_names, \" \" )
  # Fill column name and position arrays.
  while( ++p <= NF ) {
    # Make sure we pick the first occurrence of duplicated column
    # names (it may happen after a join).
    if( P[$p] == NULL ) {
      P[$p] = p
      N[p] = $p
      for( d in c_names )
        if( $p == c_names[d] ) {
          delete P[$p]
          delete N[p]
        }
    }
  }
  if( ! %d ) {
    while( ++c <= NF ) {
      if( N[c] != NULL ) {
        if( out_rec == NULL ) out_rec = $c
        else out_rec = out_rec OFS $c
      }
    }
    if( out_rec != NULL ) print out_rec
  }
  r++
  next
}
# Column definitions.
r == 1 {
  c = 0
  out_rec = NULL
  if( ! %d ) {
    while( ++c <= NF )
      if( N[c] != NULL ) {
        if( out_rec == NULL ) out_rec = $c
        else out_rec = out_rec OFS $c
      }
    if( out_rec != NULL ) print out_rec
  }
  r++
  NR = 0
  next
}
# Table body.
{
  c = 0
  out_rec = NULL
  while( ++c <= NF )
    if( N[c] != NULL ) {
      if( out_rec == NULL ) out_rec = $c
      else out_rec = out_rec OFS $c
    }
  for( e in e_set ) gsub( e_set[e], \"\\\\\\\\\"e_set[e], out_rec )
  if( out_rec != NULL ) print out_rec
}", debug, no_hdr, c_names, no_hdr, no_hdr);

  if( debug )
    fprintf (stderr, "Generated AWK program:
    ----------\n%s\n----------\n",cmd_buf);

  execlp(AWK,"awk",cmd_buf,NULL);
  exit(0);
}
