/* -------------------------------------------------------------------------
Stephan Huckemann Oct.15.01
hypergeometric cumulative probability for at least x successes in a sample of size n where 
population size is N with X successes in entire population

let a = max(0, n+X-N) and b = min(n,X), then

P(t <= x) = Sum(i= a ... [x]) of { binomial(X, i)* binomial(N-X, n-i) / binomial(N, n)}
	  = 1 - Sum(i = [x]+1 ... b) of { binomial(X, i)* binomial(N-X, n-i) / binomial(N, n)}
-------------------------------------------------------------------------*/

stats::hypergeometricCDF:=proc(N, X, n)

local fN, fX, fn;

option escape;
begin

/*prog::check(stats::hypergeometricCDF,6);*/

  if args(0)<>3 then
     error("expecting three arguments")
  end_if:

  // ------------- check N -------------
  fN := float(N);
  if domtype(N) = DOM_INT and N < 1 then
     error("the 'population size' must be symbolic or a positive integer"):
  end_if;
  if domtype(fN) = DOM_FLOAT and domtype(N) <> DOM_INT then
     error("the 'population size' must be symbolic or a positive integer"):
  end_if;
  if domtype(fN) = DOM_COMPLEX then
     error("the 'population size' must be real"):
  end_if;
  
 // ------------- check X -------------
  fX := float(X);
  if domtype(X) = DOM_INT then
	if X < 0 or (domtype(N)= DOM_INT and X > N) then
	     error("the 'success population size' must be symbolic or an integer".
		" between 0 and 'population size'"):
	end_if;
  end_if;
  if domtype(fX) = DOM_FLOAT and domtype(X) <> DOM_INT then
	     error("the 'success population size' must be symbolic or an integer".
		" between 0 and 'population size'"):  
  end_if;
  if domtype(fX) = DOM_COMPLEX then
     error("the 'success population size' must be real"):
  end_if;
 
 // ------------- check n -------------
  fn := float(n);
  if domtype(n) = DOM_INT then
	if n < 0 or (domtype(N) = DOM_INT and n > N) then
	     error("the 'sample size' must be symbolic or an integer".
		" between 0 and 'population size'"):
	end_if;
  end_if;
  if domtype(fn) = DOM_FLOAT and domtype(n) <> DOM_INT then
     error("the 'sample size' must be symbolic or an integer".
	" between 0 and 'population size'"):
  end_if;
  if domtype(fn) = DOM_COMPLEX then
     error("the 'sample size' must be real"):
  end_if;
 

  //-------------------------------
  // return the following procedure
  //-------------------------------
  proc(x)
  local NN, XX, nn, fNN, fXX, fnn, fx, a, b, tx, tmp, tmpsum, k;
  begin
    if args(0)<>1 then
       error("expecting one argument")
    end_if:

    // double check the parameters N, X and n. They
    // might have changed since this procedure was
    // created:


    // ------------- check N -------------
    NN  := context(N):
    fNN := float(NN):
    if domtype(NN) = DOM_INT and NN < 1 then
     error("the 'population size' must be symbolic or a positive integer"):
    end_if;
    if domtype(fNN) = DOM_FLOAT and domtype(NN) <> DOM_INT then
     error("the 'population size' must be symbolic or a positive integer"):
    end_if;
    if domtype(fNN) = DOM_COMPLEX then
     error("the 'population size' must be real"):
    end_if;

    // ------------- check X -------------
    XX := context(X):
    fXX := float(XX):
    if domtype(XX) = DOM_INT then
	if XX < 0 or (domtype(NN)= DOM_INT and XX > NN) then
	     error("the 'success population size' must be symbolic or an integer".
		" between 0 and 'population size'"):
	end_if;
    end_if;
    if domtype(fXX) = DOM_FLOAT and domtype(XX) <> DOM_INT then
     error("the 'success population size' must be symbolic or an integer".
	" between 0 and 'population size'"):
    end_if;
    if domtype(fXX) = DOM_COMPLEX then
     error("the 'success population size' must be real"):
    end_if;

    // ------------- check n -------------
    nn:= context(n):
    fnn := float(nn):
    if domtype(nn) = DOM_INT then
	if nn < 0 or (domtype(NN) = DOM_INT and nn > NN) then
	     error("the 'sample size' must be symbolic or an integer".
		" between 0 and 'population size'"):
	end_if
    end_if;
    if domtype(fnn) = DOM_FLOAT and domtype(nn) <> DOM_INT then
     error("the 'sample size' must be symbolic or an integer".
	" between 0 and 'population size'"):
    end_if;
    if domtype(fnn) = DOM_COMPLEX then
     error("the 'sample size' must be real"):
    end_if;

    fx := float(x);
    // -------- If x is neither symbolic nor integer nor float-----------------------
    if domtype(fx) = DOM_COMPLEX	then 
	error("the argument must be real"); 
    end_if;

    // now we can assume that N, X and n are of type integer or symbolic,
    // x is integer, float or symbolic

    if x = -infinity then return(0); end_if;
    if x =  infinity then return(1); end_if;


    //------------ trivial cases -----------------
    // if x < 0, then P(x) = 0
    // if x >= N, n, X, then P(x) = 1.

    // first the numeric case:
    if domtype(fx) = DOM_FLOAT then
	if fx < 0 then 
		if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
		return(0); 
	end_if;
	if (domtype(NN) = DOM_INT and (fx - NN) >= 0) then 
		if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
		return(1); 
	end_if;
    	if (domtype(XX) = DOM_INT and (fx - XX) >= 0) then 
		if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
		return(1); 
	end_if;
    	if (domtype(nn) = DOM_INT and (fx - nn) >= 0) then 
		if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
		return(1); 
	end_if;

    // if n numeric = 0 => P(x) = 1 if x >= 0, P(x) = 0 else 
    // (that's already covered above)

    else  // x is symbolic and cannot be convertet to float
	if (domtype(NN) <> DOM_INT and is((x - NN) >= 0) = TRUE) then 
		return(1); 
	end_if;
    	if (domtype(XX) <> DOM_INT and  is((x - XX) >= 0) = TRUE) then 
		return(1); 
	end_if;
    	if (domtype(nn) <> DOM_INT and  is((x - nn) >= 0) = TRUE) then 
		return(1); 
	end_if;

    end_if;

   // ---------------- now we know that x is symbolic or >= 0 ---------

    // --------------- pretty trivial case -----------------------
    // if n + X > N and x < n + X - N then P(x) = 0
    // check if n + X - N is numeric
     if	domtype(nn + XX - NN) = DOM_INT and domtype(fx) = DOM_FLOAT
	and (nn + XX - NN) > 0 and (nn + XX - NN - fx) > 0 then
		if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
		return(0);
	// note that actually (nn + XX - NN - x) > 0 implies that  (nn + XX - NN) > 0
     end_if;

    // For reasons of efficiency the symbolic case is continued at the end

    // -------------- check X = N --------------- 
    // P(x) = 1 <=> x >= n (is already done above)
    // P(x) = 0 <=> x <  n 

    if iszero(NN - XX) then
	if domtype(fx) = DOM_FLOAT and domtype(nn) = DOM_INT then
		if (fx - nn) < 0 then
			if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
			return(0);
		end_if;
    	else  // x is symbolic and cannot be convertet to float or
	      // nn is symbolic, or both, try to compare them
		if ( domtype(fx) <> DOM_FLOAT and domtype(nn) =  DOM_INT) or
		   ( domtype(fx) =  DOM_FLOAT and domtype(nn) <> DOM_INT) or
		   ( domtype(fx) <>  DOM_FLOAT and domtype(nn) <> DOM_INT) then
			if (is(x - nn < 0) = TRUE) then
				if domtype(x - nn) = DOM_FLOAT then
					return(float(0));
				end_if;
				return(0);
			end_if;
			// else nothing can be said
      			return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
		end_if;
    	end_if; 
    end_if;   

    // -------------- check X = 0 --------------- 
    // P(x) = 1 <=> x >= 0 is already done above
    // P(x) = 0 <=> x <  0 is already done above

    // -------------- check X = 1 --------------
    // P(x) = 1 	if 1 >= x 	is already covered above
    // P(x) = (N - n)/N if 0 <= x < 1
    // P(x) = 0 <=> x <  0 is already done above

    if iszero(1 - XX)then
	if domtype(fx) = DOM_FLOAT then
		if fx < 1 and fx >= 0 then
			if domtype(x) = DOM_FLOAT then return(float((NN-nn)/NN)) end_if;
			return((NN-nn)/NN);
		end_if;
	else // x is symbolic, nothing can be said
      		return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
	end_if;
    end_if;

    // -------------- check X = N - 1 ---------------
    // P(x) = 0 	if x < n - 1 is already covered above since n - 1 = n + X - N
    // P(x) = n / N     if n-1 <= x < n
    // P(x) = 1		if x >= n	is already done above

    if iszero(NN - 1 - XX)then
	if domtype(fx) = DOM_FLOAT and domtype(nn) = DOM_INT then
		if (fx - nn) < 0 and (fx - nn + 1) >= 0 then
			if domtype(x) = DOM_FLOAT then return(float(nn/NN)) end_if;
			return(nn/NN);
		end_if;
   	else  // x is symbolic and cannot be convertet to float or
	      // nn is symbolic, or both
		if ( domtype(fx) <> DOM_FLOAT and domtype(nn) =  DOM_INT) or
		   ( domtype(fx) =  DOM_FLOAT and domtype(nn) <> DOM_INT) or
		   ( domtype(fx) <>  DOM_FLOAT and domtype(nn) <> DOM_INT)then			
			if (is(x - nn < 0) = TRUE) and (is((nn - 1) - x <= 0) = TRUE) then
				if domtype(x - nn) = DOM_FLOAT then return(float(nn/NN)) 
				end_if;
				return(nn/NN);
			end_if;
			// else nothing can be said
      			return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
		end_if;
    	end_if; 
    end_if;   


	// ---------------------- now n ---------------------------
    // -------------- check n = N --------------- 
    // P(x) = 1 <=> x >= X (is already done above)
    // P(x) = 0 <=> x <  X 

    if iszero(NN - nn) then
	if domtype(fx) = DOM_FLOAT and domtype(XX) = DOM_INT then
		if (fx - XX) < 0 then
			if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
			return(0);
		end_if;
    	else  // x is symbolic and cannot be convertet to float or
	      // XX is symbolic, or both, try to compare them
		if ( domtype(fx) <> DOM_FLOAT and domtype(XX) =  DOM_INT) or
		   ( domtype(fx) =  DOM_FLOAT and domtype(XX) <> DOM_INT) or
		   ( domtype(fx) <>  DOM_FLOAT and domtype(XX) <> DOM_INT) then
			if (is(x - XX < 0) = TRUE) then
				if domtype(x - XX) = DOM_FLOAT then
					return(float(0));
				end_if;
				return(0);
			end_if;
			// else nothing can be said
      			return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
		end_if;
    	end_if; 
    end_if;   

    // -------------- check n = 0 --------------- 
    // P(x) = 1 <=> x >= 0 is already done above
    // P(x) = 0 <=> x <  0 is already done above

    // -------------- check n = 1 --------------
    // P(x) = 1 	if 1 >= n 	is already covered above
    // P(x) = (N - X)/N if 0 <= x < 1
    // P(x) = 0 <=> x <  0 is already done above

    if iszero(1 - nn)then
	if domtype(fx) = DOM_FLOAT then
		if fx < 1 and fx >= 0 then
			if domtype(x) = DOM_FLOAT then return(float((NN-XX)/NN)) end_if;
			return((NN-XX)/NN);
		end_if;
	else // x is symbolic, nothing can be said
      		return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
	end_if;
    end_if;

    // -------------- check n = N - 1 ---------------
    // P(x) = 0 	if x < X - 1 is already covered above since X - 1 = n + X - N
    // P(x) = X / N     if X-1 <= x < X
    // P(x) = 1		if x >= X	is already done above

    if iszero(NN - 1 - nn)then
	if domtype(fx) = DOM_FLOAT and domtype(XX) = DOM_INT then
		if (fx - XX) < 0 and (fx - XX + 1) >= 0 then
			if domtype(x) = DOM_FLOAT then return(float(XX/NN)) end_if;
			return(XX/NN);
		end_if;
   	else  // x is symbolic and cannot be convertet to float or
	      // nn is symbolic, or both
		if ( domtype(fx) <> DOM_FLOAT and domtype(XX) =  DOM_INT) or
		   ( domtype(fx) =  DOM_FLOAT and domtype(XX) <> DOM_INT) or
		   ( domtype(fx) <>  DOM_FLOAT and domtype(XX) <> DOM_INT)then			
			if (is(x - XX < 0) = TRUE) and (is((XX - 1) - x <= 0) = TRUE) then
				if domtype(x - XX) = DOM_FLOAT then return(float(XX/NN)) 
				end_if;
				return(XX/NN);
			end_if;
			// else nothing can be said
      			return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
		end_if;
    	end_if; 
    end_if;   

    // -------in case any is symbolic-----------------------------

// in order to display a result we would have to know the number of addends, 
// i.e. knowledge of x and at least one of a or b which means knowledge of at 
// least n and X, then we could explicitely 
// expand the sum backwards like while knowing all but N
// if we get past a we must rely on binomial(N-X, n-t) = 0 for t < x
// since binomial(0, 0) = 1 we have to exclude N = X 
// and assume N > X

    // we shall do this case first:

    if domtype(NN) <> DOM_INT and domtype(XX) = DOM_INT and domtype(nn) = DOM_INT 
	and domtype(fx) = DOM_FLOAT  then
	if is((NN-XX) > 0 ) = TRUE then
	    b := min(nn, XX); 
	    tx := trunc(x);
	    // if x is float everything goes float
            // we do however not use floating point arithmetic here
            // internally because binomial(N,1.0) <> N 
	    if domtype(x) = DOM_FLOAT then
		tmp := float(1);
    	    else
		tmp := 1;
	    end_if;
	    // tx < b
	    tmp := tmp * binomial(XX, tx + 1) ;
	    tmpsum := tmp * binomial(NN - XX, nn - tx - 1);   
	    if b - tx - 1 > 0 then
		    for k from tx + 2 to b do
	// binomial(X, k) = binomial(X, k - 1) * (X - k + 1) / k
			tmp := tmp * (XX - k + 1) / k;
        		tmpsum := tmpsum + tmp * binomial(NN - XX, nn - k);
		    end_for;
	    end_if;
	    if domtype(x) = DOM_FLOAT then
		    return(float(1 - tmpsum / binomial(NN, nn))); // P(t <= x) = 1 - P(t > x)
	    else
	    	return(1 - tmpsum / binomial(NN, nn)); // P(t <= x) = 1 - P(t > x)
	    end_if;
	end_if;
    end_if;

    // now any other case where at least one is symbolic

    if domtype(NN) <> DOM_INT or domtype(XX) <> DOM_INT or domtype(nn) <> DOM_INT 
	or domtype(fx) <> DOM_FLOAT  then

    // --------------- pretty trivial case -----------------------
    // if n + X > N and x < n + X - N then P(x) = 0
    // what was left was that case in which at least one of the N,X,n was symbolic

     if
	is((nn + XX - NN) > 0) = TRUE and 
	is((nn + XX - NN - x) > 0) = TRUE then
		if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
		return(0);
     end_if;
    // nothing can be said
       return(hold(stats::hypergeometricCDF)(NN, XX, nn)(x));
    end_if;

    // ----------------------------------------
    // now everything is completly numeric
    // N,X and n are positive integers
    // trunc(x) produces a non-negative integer 
    //		less than min(n, X) and more than or equal to max(0, n + X - N) 
    // ----------------------------------------

    // is trunc(x) closer to max(0, n + X - N) or to min(n, X)? Depending on that
    // two different summations further down

    a := max(0, nn + XX - NN);
    b := min(nn, XX); 
    tx := trunc(x);

    // if x is float convert everything to float for speeding up

    if domtype(x) = DOM_FLOAT then
	tmp := float(1);
    else
	tmp := 1;
    end_if;

    if tx - (a + b)/ 2 < 0 then // less addends from below
	    tmp := tmp * binomial(XX, a) * binomial(NN - XX, nn - a) ;   // = P(t <= a)
	    tmpsum := tmp;    
	    for k from a+1 to tx do
	// binomial(X, k) = binomial(X, k - 1) * (X - k + 1) / k
        // binomial(N - X, n - k) = binomial(N - X, n - (k - 1)) * (n - k + 1) / (N - X - n  + k)
		tmp := tmp * (XX - k + 1) / k * (nn - k + 1) / (NN - XX - nn + k);
        	tmpsum := tmpsum + tmp;
	    end_for;
	    return(tmpsum / binomial(NN, nn));    // = P(t <= x)
	else  // tx < b!
	    tmp := tmp * binomial(XX, tx + 1) * binomial(NN - XX, nn - tx - 1) ; // = P(t = tx + 1)
	    tmpsum := tmp;   
	    if b - tx - 1 > 0 then
		    for k from tx + 2 to b do
	// binomial(X, k) = binomial(X, k - 1) * (X - k + 1) / k
        // binomial(N - X, n - k) = binomial(N - X, n - (k - 1)) * (n - k + 1) / (N - X - n  + k)
			tmp := tmp * (XX - k + 1) / k * (nn - k + 1) / (NN - XX - nn + k);
        		tmpsum := tmpsum + tmp;
		    end_for;
	    end_if;
	    return(1 - tmpsum / binomial(NN, nn));      // P(t <= x) = 1 - P(t > x)
	end_if;
  end_proc:
end_proc:
