//       

/*--
Calls:    getdata(test_args, opt, nc, l) / getdata(test_args, opt, nc, s, c)

internal procedure, required by procedures like mean, median,...
to get and / or test their arguments / elements of arguments.
if test_args is TRUE, then all arguments are tested, if test_args is
FALSE, no arguments are tested.
if a data-column of a sample is specified, the column index is verified
if no data-column of a sample is specified, then all data-columns are
selected.
option "anything":
                allows arbitrary MuPAD objects
option "all_data":
                allows DOM_INT, DOM_FLOAT, DOM_RAT, DOM_EXPR, DOM_IDENT, DOM_COMPLEX
option "numeric_only":
                allows DOM_INT, DOM_FLOAT, DOM_RAT, DOM_COMPLEX,
                converts expressions to floats
if called by a procedure or function, all errors are returned as
strings. if no errors are found, a sequence of data-columns is returned.


Parameters:
  test_args - TRUE or FALSE
  opt       - option, "anything" or "all_data" or "numeric_only"
  nc        - number of allowed data-columns
  l         - list of rows or sequence of columns
  s         - stats::sample
  c         - (list of) data-column index/indices (optional)

In detail. Possible calls are:

1) stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", nc, c1, c2, .., c.nc)
       nc          - integer > 0
       c1, c2, ... - columns. A column c is a list
                    c = [x1, x2, ..]  of data x.i.
                    All columns must have the same length
       Returns:     c1, c2, .., c.nc

2) stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", 1, x1, x2, ..)
       x1, x2, ... - data (e.g., numbers)
       Returns: [x1, x2, ..]  // to be interpreted as 1 column

   Case 2 is equivalent to case 1 with nc = 1, i.e., 
      stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", 1, x1, x2, ..)
    = stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", 1, c1)
   with c1 = [x1, x2, ...]

3) stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", nc, s) 
       nc - integer > 0
       s  - a stats::sample with exactly nc columns
       Returns: c1, c2, ..  where c.i are the columns of s (a column is a list)

4) stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", nc, s, ci1, ci2, .., ci.nc) 
       nc              - integer > 0
       s               - a stats::sample with exactly at least nc columns
       ci1, ..., ci.nc - column indices
       Returns: c.ci1, .. , ci.nc,  where c.j are the columns of s (a column is a list)

5) stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only", nc, s, [ci1, ci2, .., ci.nc]) 
       nc              - integer > 0
       s               - a stats::sample with exactly at least nc columns
       ci1, ..., ci.nc - column indices
       Returns: c.ci1, .. , ci.nc,  where c.j are the columns of s (a column is a list)


In all cases: stats::getdata(TRUE/FALSE, "anything"/"all_data"/"numeric_only, nc, whatever)
              returns a sequence of exactly nc lists.
              Each list should be regarded as a column of a sample.

Note: nc is redundant information (the same functionality could
      be implemented without this parameter)

--*/

stats::getdata := proc(test_args : DOM_BOOL, opt, nc, li, co)
  local l, c, err, err_msg, floatconvert;
begin
  if args(0)>=5 then
	c:=co
  end_if; 
  if test_args then // return errors as errors 
    err:=error;
    if args(0) < 4 then error("Wrong number of arguments") end_if;
    if (opt <> "all_data" and 
        opt <> "numeric_only" and
        opt <> "anything") or
        domtype(nc) <> DOM_INT
        or nc < 1 then 
      error("Invalid arguments")
    end_if
  else // return errors as strings (if getdata is called by procedure) 
    err:=return
  end_if;
 
  err_msg:=FAIL; // to avoid warnings 
  l:=li;
  if domtype(l) = DOM_LIST then 
    if domtype(op(l, 1)) = DOM_LIST then                   // list of rows 
      if test_args then
        if args(0) <> 4 then
          err("Wrong number of arguments")
        elif has(map(l, proc() begin (if nops(args(1)) <> nops(op(l, 1)) then 
                               FAIL 
                             end_if) end_proc ), FAIL)
        then
          err("Rows are not of equal length")
        end_if
      end_if;
      l:=stats::unzipCol(l);
      if test_args then
        map(l, proc() begin (stats::testdata(opt, args(1));
                    if last(1) <> TRUE then err_msg:=last(1) end_if) end_proc );
        if domtype(err_msg) = DOM_STRING then err(err_msg) end_if
      end_if

    elif nc > 1 then                       // sequence of columns (nc > 1) 
      l:= args(4..args(0));
      if test_args then
        if l = [] then err("Column empty") end_if;
        if has(map([l], proc() begin (if nops(args(1)) <> nops(op([l], 1)) then 
                               FAIL 
                             end_if) end_proc ), FAIL)
        then
          err("Columns are not of equal length")
        end_if;
        map(l, proc() begin (stats::testdata(opt, args(1));
                    if last(1) <> TRUE then err_msg:=last(1) end_if) end_proc );
        if domtype(err_msg) = DOM_STRING then err(err_msg) end_if
      end_if

    else                                                       // 1 column 
      if test_args then
        stats::testdata(opt, args(4..args(0)));
        if last(1) <> TRUE then err(last(1)) end_if
      end_if
    end_if;

  elif domtype(l) = stats::sample then                           // sample 
    if args(0) = 4 then      // no column indices specified -> all columns 
      c:=[$ 1..nops(op(l, 1))];
      if opt <> "anything" then
        c:=map(c, proc() begin (if domtype(op(l, [1, args(1)])) <> DOM_STRING then
                       args(1)
                     else            // select data-columns from all columns 
                       null()
                     end_if) end_proc );
      end_if;
    elif args(0) > 5 then // sequence of indices 
      c:=[args(5..args(0))]
    elif domtype(c) <> DOM_LIST then // single index 
      c:=[c]
    end_if;
    if nops(c) <> nc then
        err("mismatch in number of columns. Weights provided?");
    end_if;
    // test combinations of l with all elements of c 
    map(c, proc() begin (stats::testdata(opt, l, args(1));
                  if last(1) <> TRUE then err_msg:=last(1) end_if) end_proc );
    if domtype(err_msg) = DOM_STRING then err(err_msg) end_if;
    l:=op(map(c, proc() begin (stats::sample)::col2list(l, args(1)) end_proc ))
                                                        // get all columns 
  else                                              // sequence of numbers 
    l:=[args(4..args(0))];
    if test_args then
      stats::testdata(opt, l);
      if last(1) <> TRUE then err(last(1)) end_if
    end_if
  end_if;

  // If weights are provided, then the requested number of columns nc
  // should be smaller than the actual number of columns extracted
  // from the data. The following error makes stats::reg try again to
  // get the data. The next try includes weights.

  if nops([l]) <> nc then
    err("Wrong number of data-columns")
  end_if;


  //--------------------------------------------------------------
  // convert expressions to floats, if "numeric_only" is specified:
  // Stupid: this conversion is also done inside stats::testdata,
  // so we do it twice. Should be optimized!

  floatconvert:= proc(x) begin
      if domtype(x) = DOM_EXPR or contains(Type::ConstantIdents, x)
      then float(x)
      else x;
      end_if;
  end_proc;

  if opt = "numeric_only" then
     if domtype(l[1]) = DOM_LIST then
          l:= map(l, map, floatconvert);
     else l:= map(l, floatconvert);
     end_if;
  end_if;
  //--------------------------------------------------------------

  return(l);
end_proc:

// end of file 
 
