//       

/*--
usage:
tabulate(s)                               input format 1
tabulate(s, c, cal)                       input format 2
tabulate(s, [c1, cal1], [c2, cal2], ...)  input format 3

gathers all rows with identical elements in those columns NOT
specified. the elements of the specified columns are thereby gathered
using the function/procedure cal.
input format 1:
   no data-columns are specified, thus equal rows are just counted
   (the number of equal rows is stored in an additional column)

input format 2:
   data-columns and (optional) one function are specified
   (by default the function is '_plus')

input format 3:
   [data-column index, function]-pairs are specified

WARNING: compatibility of function with number of specified columns
is NOT TESTED (yet) !
Functions with more than one data-column must accept a SEQUENCE OF
COLUMNS !!

For the output sample columns are arranged according to their order
given by
 1) their position in the input sample, if the output-column results
    from ONE column only (e.g. function '_plus', or unspecified
    columns)
 2) the first specified column for that function
    (e.g. in '...,[6, 3, cal],...' it would be '6')
--*/
/*--
parameters:
s   stats::sample
c   data-column index ((sequence of) integer(s)),
    range-indices allowed in input format 2
cal function/procedure,
    optional in input format 2, default: '_plus'
--*/


stats::tabulate := proc(sa)
  local argtype, c, cal, ccal, ccal2, fake, mc, n, i, j, s, ss, sss, m, k; 
begin
  if testargs() then
    if args(0) = 0 then
      error("Wrong number of arguments")
    elif domtype(sa) <> stats::sample then
      error("First argument: expecting a stats::sample")
    end_if
  end_if;

  s:=sa;
  s:=(stats::sample)::convert_to(s, DOM_LIST); // sample to list 

  // distinguish the 3 possible input-formats by their last argument       
  argtype:=type(args(args(0)));
  case argtype

  // only one argument given, thus counting of equal rows                  
  of stats::sample do
    if testargs() then
      if args(0) > 1 then
        error("Invalid last argument")
      end_if
    end_if;
    s:=map(s, proc() begin append(args(1), 1) end_proc ); // append column containing only 1s
    c:=[nops(s[1])];
    ccal:=[[nops(s[1]), _plus]];    // _plus on last column, thus counting 
                                    // equal rows                          
    break;

  // columns (and one function) given, this function will be used on       
  // each specified column; default function is _plus                      
  of DOM_INT do
  of "_range" do
  of DOM_FUNC_ENV do
  of DOM_PROC do
  of "_fconcat" do
    if argtype = DOM_INT or argtype = "_range" then
      cal:=_plus; // only columns given, thus using _plus as default 
      c:=[args(2..args(0))]
    else
      cal:=args(args(0));
      c:=[args(2..args(0)-1)]
    end_if;
    c:=map(c, proc() begin (if type(args(1)) = "_range" then
                    $ args(1); // transform ranges to sequences 
                  else
                    args(1)
                  end_if) end_proc );

    if testargs() then // testing of indices 
      if has(map(c, proc() begin bool(testtype(args(1), DOM_INT) 
                             and 0 < args(1) and args(1) <= nops(op(s, 1))
                             ) end_proc ), FALSE)
         or has(map(c, proc() begin not testtype(op(s, [1, args(1)]), DOM_STRING)
                              end_proc ), FALSE)
             // if at least one element of c (column index) not of DOM_INT 
             // or < 1 or > number of columns or column contains a string  
             // the latter must be tested separately.                      
      then   
        error("Invalid data-column index") 
      end_if
    end_if;

    ccal:=map(c, proc() begin [args(), cal] end_proc );
    break;

  // (sequence of) list(s) containing columns and corresponding            
  // functions; this is the most general input format                      
  of DOM_LIST do
    ccal:=[args(2..args(0))];
    c:=map(ccal, proc() begin op(args(), 1..(nops(args())-1)) end_proc );
    cal:=map(ccal, proc() begin op(args(), nops(args())) end_proc );

    if testargs() then // testing of indices 
      if has(map(c, proc() begin bool(testtype(args(1), DOM_INT) 
                             and 0 < args(1) and args(1) <= nops(op(s, 1))
                             ) end_proc ), FALSE)
         or has(map(c, proc() begin not testtype(op(s, [1, args(1)]), DOM_STRING)
                              end_proc ), FALSE)
             // if at least one element of c (column index) not of DOM_INT 
             // or < 1 or > number of columns or column contains a string  
             // the latter must be tested separately.                      
      then   
        error("Invalid data-column index") 
      end_if;
      // testing of functions 
      if has(map(cal, proc() begin bool(testtype(args(), DOM_FUNC_ENV) 
                             or testtype(args(), DOM_PROC)
                             or testtype(args(), "_fconcat")
                             ) end_proc ), FALSE) then   
        error("Invalid argument or missing function/procedure") 
      end_if;
      // less than 2 elements in [column, function]-tupel? 
      if has(map(ccal, proc() begin bool(nops(args()) > 1
                             ) end_proc ), FALSE) then   
        error("Missing column index")
      end_if;

      // still to come : 
      // testing if number of columns compatible with function 
      /*if has(map(ccal, fun(iszero(
        traperror(op(args(), nops(args()))(
                  (if nops(args()) = 2 then op([1,3,4,5])
                   elif nops(args()) > 2 then
                     op(([]; for k from 1 to nops(args())-1 do
                                        append(last(1), [1+k,5,3+k+k])
                                      end_for))
                   end_if) ))
                                   ))), FALSE) then   
        error("Columns incompatible with function/procedure (Wrong number of columns?)")
      end_if*/ //still to come

    end_if;
    break;

  otherwise
    if testargs() then error("Invalid input format") end_if

  end_case;

  fake:=proc() begin op(args(), 1) end_proc ; // guess! ;-) 
  // construct list mc of those column indices not in c 
  mc:=select([n $ n = 1..nops(s[1])], proc() begin _not(has(c, args(1))) end_proc );
  // prepare ccal 
  ccal:=map(mc, proc() begin [args(), fake] end_proc ).ccal;
  //ccal:=sort(ccal, fun(op(args(1), 1) < op(args(2), 1)));
  ccal2:=[];
  for j from 1 to nops(s[1]) do
    for i from 1 to nops(ccal) do
      if op(op(ccal, i), 1) = j then
        ccal2:=append(ccal2, op(ccal, i))
      end_if
    end_for
  end_for;
  ccal:=ccal2;

  // actual procedure 
  ss:=[];
  while _not(s = []) do  // while not all rows of s collected 
    m:=[];
    for n in mc do
      m:=append(m, s[1][n]) // m = all elements of first row of s wich are 
    end_for;                //                             specified in mc 
    // sss = list of all rows of s containing m 
    sss:=select(s, proc() local n; begin []; // = stats::selectRow(s, mc, m)       
                          for n in mc do 
                            append(last(1), op(args(1), n))
                          end_for;
                          bool(last(1) = m) end_proc );
    // gather rows of sss using specified function 
    map(ccal, proc() local l; begin (op(args(), nops(args()))(   // specified function on following arguments:     
       (if nops(args()) = 2 then               // only one data-column, then sequence of entries 
          op((k:=op(args(), 1); map(sss, proc() begin args()[k] end_proc )))
        else                                   // n data-columns, then sequence of n columns     
          op(([]; for l from 1 to nops(args())-1 do
                               k:=op(args(), l);
                               append(last(2), map(sss, proc() begin args()[k] end_proc )) 
                             end_for))
          /*;stats::zipCol(last(1))*/ // without this, a sequence of columns is 
                             // returned otherwise a list of rows          
        end_if))) end_proc );
    // append last result (gathered sss) to ss and then remove all rows    
    // from s containing m                                                 
    ss:=append(ss, last(1));
    s:=select(s, proc() local n; begin ([]; // = stats::selectRow(s, mc, m, _not)   
                          for n in mc do 
                            append(last(1), op(args(1), n))
                          end_for;
                          _not(bool(last(1) = m))) end_proc )
  end_while;
  new(stats::sample, ss)
end_proc:


// end of file 
 
