/*---------------------------------------------------------------
Info:  The probability function of a finite data sample

Call(s):
       stats::finitePF([x1, x2, ..], [p1, p2, ..])
       stats::finitePF([[x1, p1], [x2, p2], ..])
       stats::finitePF(s <, c1, c2>)

Parameters:
       x1, x2, .. -- arbitrary MuPAD objects. 
       p1, p2, .. -- probability values: symbols or positive
                     numerical values. In case of numerical values,
                     they  must add up to 1
       s          -- a sample of type stats::sample
       c1, c2     -- a column index: a positive integer.
                     Column c1 of s provides the data x1, x2, ..
                     Column c2 of s provides the data p1, p2, ..
                     There is no need to pass a column index,
                     if s has only two columns

Returns: a procedure

Details:
   -  The function
      f:= stats::finitePF([x1,x2,..],[p1, p2,..])
      can be called in the form f(x), where x is
      an arbitrary MuPAD object or a set or a list
      of such objects. 

    - If x = x.i for some i, then p.i is returned. 
      Otherwise, 0 is returned.

      If x is a set, then the sum of all p-values
      corresponding to the intersection of the sets 
      x and {x1, x2, ..} is returned.

      If x is a list, it is treated like a set
      (i.e., duplicates in x are removed).

    - Note that x1 = 2*a and x2 = 2.0*a are treated
      like different data!

Examples:
   > f:= stats::finitePF([3, 2, PI, -1, 5], [p3, 0.2, 0.3, 0.3, p5]):
   > f(-2), f(1.3), f(3), f(PI), f(3.2), f(x), f(5), f(infinity)

                   0, 0, p3, 0.3, 0, 0, p5, 0

   > f({-2, 3, 2, PI})

                       p3 + 0.5

   > f([-2, 3, 3, 2, float(PI), PI])

                       p3 + 0.5
---------------------------------------------------------------*/
stats::finitePF:=proc(data)
local n, p, symbolicData, symbolicProb, cdf, eliminateDuplicates;
option escape;
begin
  if args(0) < 1 then
     error("expecting at least one argument")
  end_if:

  //----------------------------------------------------
  data:= stats::getdata(testargs(), "anything", 2, args(1..args(0))):
  if domtype(data) = DOM_STRING then
       error(data)
  end_if:

  if testargs() then
     if has([data] , []) then
        error("empty sample"):
     end_if:
  end_if:

  //----------------------------------------------------
  // Now, data = [x1, x2, ..], [p1, p2, ..];
  //----------------------------------------------------

  // split data into names and probabilities:
  [data, p]:= [data];
  assert(domtype(data) = DOM_LIST):
  assert(nops(data) = nops(p)):


  // There may be duplicates in the data. Eliminate them:
  eliminateDuplicates:= proc(data, p)
  local m, n, i, j; 
  begin
     n:= nops(data):
     m:= nops({op(data)}):
     if n = m then
        return([data, p]):
     else // warning("there seem to be duplicate data entries"):
        for j from n downto 1 do
        // find the *first* occurrence of data[j] in data
        i:= contains(data, data[j]);
        if i = j then 
           next; // there is no duplicate of data[j]
        end_if;
        // i <> j: there are duplicates: data[i] = data[j]
        // delete data[j] and add p[j] to p[i]:
        p[i]:= p[i] + p[j];
        delete data[j]; // eliminate duplicate
        delete p[j];    // eliminate duplicate
        n:= n - 1; // current length of data and p
        if n = m then
           // we can be sure there are no more duplicates
           break;
        end_if;
      end_for;
    end_if;
    [data, p];
  end_proc:

  // Do the elimination of the duplicates
  [data, p]:= eliminateDuplicates(data, p);
  assert(nops(data) = nops(p));

  n:= nops(data):

  /* The following does not work: we wish to allow strings in the data! */
  // symbolicData:= bool(indets(data) minus Type::ConstantIdents <> {}):
  symbolicData:= bool(map({op(data)}, domtype@float) <> {DOM_FLOAT});
  symbolicProb:= bool(indets(p) minus Type::ConstantIdents <> {}):

  if not symbolicProb then
     cdf:= _plus(op(p)):
     if domtype(float(cdf)) = DOM_FLOAT and
        specfunc::abs(float(cdf - 1)) > 10^(-DIGITS) then
        error("the probabilities do not add up to 1");
     end_if;
  end_if;

  //-------------------------------
  // return the following procedure
  //-------------------------------
  proc(x)
  local _data, _p, cdf, getProb, xx;
  begin
    if args(0)<>1 then
       error("expecting one argument")
    end_if:

    if symbolicData then
       // some of the symbolic values may have an up-date:
       _data:= context(data):
    else
       _data:= data;
    end_if;

    if symbolicProb then
       _p:= context(p):
       //--------------------------------------------------
       // doublecheck consistency of the updated probability values
       //--------------------------------------------------
       if indets(_p) = {} then
         cdf:= _plus(op(_p)):
         if domtype(float(cdf)) = DOM_FLOAT and
            specfunc::abs(float(cdf - 1)) > 10^(-DIGITS) then
            error("the probabilities do not add up to 1");
         end_if;
       end_if;
    else
       _p:= p:
    end_if;

    if symbolicData then
       [_data, _p]:= eliminateDuplicates(_data, _p);
    end_if;

    getProb:= proc(x)
     local i;
    begin
       i:= contains(_data, x):
       if i = 0 then
          return(0):
       else
          return(_p[i]);
       end_if;
    end_proc;

    if type(x) = DOM_LIST then
       x:= {op(x)}; // remove duplicates
    end_if;
    if type(x) = DOM_SET then
         return(_plus(getProb(xx) $ xx in x));
    else return(getProb(x));
    end_if;
  end_proc;
end_proc:
