/*-------------------------------------------------------------------

faclib::knapsack -- combine modular factors via the 
                    van Hoeij Algorithm

Introduction -- faclib::knapsack(f,mf,p,a) will try to combine
                the modular factors in mf of f which were
                determined up to accuracy p^a to find the
                proper rational factors of f.
 
Call -- faclib::knapsack(f, mf, p, a <, GetBound>)

Parameters -- f  - a squarefree, univariate polynomial
                   with integer coefficients.
              mf - a list of modular factors
              p  - a prime number
              a  - a positive integer - the accuracy up to which 
                   the modular factors are determined, i.e.
                   f = _mult(op(mf)) mod p^a

Options: -- GetBound - if GetBound is specified, faclib::knapsack
                       will return an integer a such that
                       p^a will be a sufficiently high power of
                       p for the algorithm to run. It is called
                       by faclift::plift to determine how far to
                       lift.

Return value -- either FAIL, which will mean that additional 
                lifting is necessary or a list L of polynomials
                over Expr such that f = _mult(op(L)).
                
Description -- for a detailed description see 
               Marc van Hoeij: "Factoring Polynomials and the 
               Knapsack problem", to appear.
                              
               It is used only if there are many (> 16) modular
               factors left after the lifting process.
               
               The algorithm employs lattice reduction on
               truncated traces of the modular factors.
               Empirically it is much faster than the original
               LLL approach and -- for more than about 12 factors
               -- the Zassenhaus factorcombination.
               
               It factors S-D-polynomials reasonably fast, but 
               should be used only if there are 'many' modular
               factors left. It is called from faclib::plift.
-------------------------------------------------------------------*/

faclib::knapsack := proc(F : DOM_POLY, 
                        MF : DOM_LIST, 
                        p : DOM_INT, 
                        a : DOM_INT) 
local N, d, D, n, C, M, st, r, k, tmp, tmp2, rf, need_ab,
     new_f, B, L, Lred, Lgso, i, j, l, ai, bi, T, bnd, flc,
     f, mf, min_cutsize, inc_cutsize, act_cutsize, min_traces,
     inc_traces, act_traces, rfs, A, nA, Ai, Bi, ST, 
     rB, SmartHeuristic, e2t;
save SEED;
begin
  ST := time();
  SmartHeuristic := TRUE;
  userinfo(1, "Knapsack\t-algorithm called...");
  userinfo(9, "With ".expr2text(nops(MF))." modular factors, p^a=".
    expr2text(p)."^".expr2text(a)." deg f=".expr2text(degree(F)));
    
  //-----------------------------------------------------
  // Further checks on the arguments
  //-----------------------------------------------------
  if testargs() then
    // nothing necessary
  end_if;
    
  //-----------------------------------------------------
  // At first we will do some initializations
  //-----------------------------------------------------
  // needed functions
 
  e2t := expr2text; // for simplicity 
		      
  // other values, mainly independent of f
  f := F; mf := MF; // to avoid warnings.    
  new_f := TRUE; // need initialization of N, D,...
  need_ab := TRUE; // need to calculate new cut-borders
  n := nops(mf); // the number of modular factors
  rfs := 0; // the number of identified rational factors
  rf := []; // the already found rational factors 

  // Constants
  min_cutsize := ceil(3*n/log(2,p)); // ai-bi is initially...
  inc_cutsize := ceil(min_cutsize/2); // how much to increase
  act_cutsize := min_cutsize; 
  min_traces := 2; // look at at least so many traces
  inc_traces := 1; // how many traces are to be added
  act_traces := min_traces;

  SEED := 42; // to make the behaviour predictable 
    
  //-----------------------------------------------------
  // We only have to calculate a lifting-bound and exit.
  //-----------------------------------------------------
  if args(0) = 5 and args(5) = hold(GetBound) then
    // faclib::knapsack was called only to get the information
    // how far to lift.
    bnd := polylib::rootbound(f); //, hold(Iterate));
    userinfo(9,"calculated rootbound: ",bnd);
    N := degree(f);
    D := N/2;
    d := min(D, min_traces+4*inc_traces);
    i := ceil(log(p,(lcoeff(f)^d*bnd^(d))*N));
    // p^i is now bigger than N*alpha^(N/2) for
    // any root alpha of f
    j := i + (min_cutsize+4*inc_cutsize);
    // j-i now is min_cutsize+3*inc_cutsize, which should suffice

    userinfo(1,"calculated sufficiently high power: ".p."^".j);
    return(j);
  end_if;
    
  //-----------------------------------------------------
  // the main-loop, in any ending case the loop will
  // be left via a break, therefore I use the infinite
  // loop.
  //-----------------------------------------------------
  while TRUE do    
    
    //-----------------------------------------------------
    // if necessary calculate all constants that depend
    // vitally on f.
    //-----------------------------------------------------
    if new_f then
      N := degree(f);
      D := (N div 2); // If we look at D traces everything will 
                      // be mathematically safe.
      n := nops(mf);  // The number of modular factors
      bnd := polylib::rootbound(f); 
                      // all roots are absolutely less than bnd
      flc := lcoeff(f);

      userinfo(9,"calculated rootbound for f: ".e2t(bnd));
                
      // Remark on B and r:
      // It may happen in the process that we find that
      // in the reduced lattice there are r M-short rows 
      // which do not correspond to factors. Then we will
      // continue with the lattice 'created' from these
      // r rows which are coded by B.
      // Consequently we start with r=n, B=[e_1,..,e_n]
      r := n; // there are at most r ratinal factors
      B := [[_if(i=j,1,0) $j=1..n] $ i=1..n];
      // any factor is coded by one, B is the r,r-unity-matrix
      need_ab := TRUE; // need new cut-bounds
    end_if; // got new polynomial f

    d := min(D,act_traces); // look only at these d traces

    // Remark on C and M:
    // The C is responsible for balanced `short' vectors 
    // All proper factors are 'shorter' than M.
    // M,C need to be computed in every pass, because d may change.
    C := ceil(sqrt(d*n));
    if SmartHeuristic = TRUE then
      M := ceil(sqrt(n + d*(n/2)^2)); // most probably wrong!
    else
      M := ceil(sqrt(C^2*n + d*(n/2)^2));
    end_if;
            
    //-----------------------------------------------------
    // calculate the borders for the cuts.
    // For the sake of safety we will calculate these for
    // all D traces, though we might use far fewer.
    //-----------------------------------------------------
    if need_ab then 
      st := time();
      bi := [ceil(log(p,flc^i*(bnd^i)*N)) $ i=1..D]; // the lower...
      ai := [bi[i]+act_cutsize $ i=1..D]; // the upper cut-border
      for i from 1 to D do
      	if i < d and ai[i] >= a then // oops, need re-lifting
                userinfo(9,"More Lifting necessary, aborting.");
      	  return(FAIL);
      	elif ai[i] <= a then
      	  nA := i;
      	end_if; // ai[i] >= a
      end_for; // i=1..D
      userinfo(9,"ai and bi calculated in time ".e2t(time()-st));
    end_if;
    userinfo(9,"Variables: D=".e2t(D).", d=".e2t(d).", C=".e2t(C).
      ", M=".e2t(M).", act_cutsize=".e2t(act_cutsize)." nA=".e2t(nA));
                
    new_f := need_ab := FALSE;
        
    //-----------------------------------------------------
    // Now set up the sets A that contain all the 
    // information that is to be put into each column of
    // of the lattice.
    //-----------------------------------------------------
    // Remark on A and nA:
    // We use a partition A[1] union .. union A[d] = {1..nA}
    // to compute linear combinations of the traces. In all following
    // comments, the A will be ignored for the sake of simplicity.
    // Our partition A corresponds to the random-matrix A in the
    // paper by van Hoeij.
    nA := min(nA, 3*d);
    A := faclib::randpart({$1..nA}, d);
    d := nops(A);
    assert (nops(A) = d);
    userinfo(99, "Traces will be added: ".e2t(A));
    Bi := [ceil(log(p,_plus(flc^j*(bnd^j)*N $ j in A[i]))) $ i=1..d];
    Ai := [Bi[i] + act_cutsize $ i=1..d];
    
    //-----------------------------------------------------
    // calculate the truncated traces T:
    //
    //            (Tr_j(f[i]) - (Tr_j(f[i]) mods p^bi[i]))
    // T[i][j] = ------------------------------------------
    //                            p^bi[i]
    //                                 mods p^(ai[i]-bi[i])
    // for i=1..n, j=1..D
    //-----------------------------------------------------
    st := time();
    T := [0 $ n];
    for i from 1 to n do
      // calculate traces modulo p^ai[j]
      tmp := [mods (flc^j*polylib::trace(
	      poly(mf[i], op(f,2), IntMod(p^(ai[j]))) , j),
	      p^(ai[j])) $ j=1..nA];
      // make the LEFT-sided cut
      tmp := [(tmp[j]-mods(tmp[j], p^bi[j]))/p^bi[j] 
              $ j=1..nA];
      T[i] := tmp;
    end_for; // i=1..n
    userinfo (9,"T calculated in time ".e2t(time()-st));
                
    //-----------------------------------------------------
    // Now set up the basis for the lattice L, it initially
    // looks like this: (ignoring A!)
    //
    // C            T[1][1]  T[1][2]  ..  T[1][d]
    //    C         T[2][1]  T[2][2]  ..  T[2][d]
    //       ..        ..       ..    ..     ..
    //          C   T[n][1]  T[n][2]  ..  T[n][d]
    //              p^(a1-b1)
    //                       p^(a2-b2)
    //                                ..
    //                                    p^(ad-bd)    
    //
    // if e.g. n=6, d=2, B=[[1,1,0,1,0,0], [0,0,1,0,0,0],
    // [0,0,0,0,1,1]] and SmartHeuristic=FALSE, it will be:
    //
    // C  C     C        T[1,1]+T[2,1]+T[4,1]  T[1,2]+T[2,2]+T[4,2]         
    //       C           T[3,1]                T[3,2]
    //             C  C  T[5,1]+T[6,1]         T[5,2]+T[6,2]
    //                   p^(a1-b1)
    //                                         p^(a2-b2)
    //
    // If SmartHeuristic=TRUE we get
    //
    // 1        T[1,1]+T[2,1]+T[4,1]  T[1,2]+T[2,2]+T[4,2]         
    //    1     T[3,1]                T[3,2]
    //       1  T[5,1]+T[6,1]         T[5,2]+T[6,2]
    //          p^(a1-b1)
    //                                p^(a2-b2)
    //
    // (All empty fields represent zeros.)
    //-----------------------------------------------------
    st := time();
    if SmartHeuristic = TRUE then
      rB := r;
      L := [([ _if(k=i, 1, 0) $ k=1..r ]. 
	     [mods(_plus(B[i][j]*_plus(T[j][l] $ l in A[k])
	      $ j=1..n), p^(Ai[k]-Bi[k])) $ k=1..d
	     ]
	    ) $ i=1..r,
	    [_if(j=i+n, p^(Ai[i]-Bi[i]), 0) $ j=1..r+d] $ i=1..d
	   ];    
    else // SmartHeuristic=FALSE
      L := [([ C*B[i][k] $ k=1..n ]. 
	     [mods(_plus(B[i][j]*_plus(T[j][l] $ l in A[k])
	      $ j=1..n), p^(Ai[k]-Bi[k])) $ k=1..d
	     ]
	    ) $ i=1..r,
	    [_if(j=i+n, p^(Ai[i]-Bi[i]), 0) $ j=1..n+d] $ i=1..d
	   ];    
    end_if; // SmartHeuristic
    userinfo (9,"L calculated in time ".e2t(time()-st));

    // take care that the entries of L are linearly independent
    L:= linalg::basis(map(L, matrix));
    L:= map(L, coerce, DOM_LIST);
    L:= map(L, map, op);
    
    //-----------------------------------------------------
    // let LLL reduce the basis L
    //-----------------------------------------------------
    st := time();
  
    Lred := lllint(L);
    userinfo (9, "Reduced basis L in time  ".e2t(time()-st));
        
    //-----------------------------------------------------
    // orthogonalize basis and calculate the norm-vector,
    // then look which rows of Lred span the space of all
    // M-short vectors.
    // Remark: faclib::myorthog will return ONLY a vector 
    // containing the norms.
    //-----------------------------------------------------
    st := time();
    Lgso := faclib::myorthog(Lred,hold(Float)); // GS-orthogonalize 
    // now get a vector with the row-norm
    userinfo (9, "Orthogonalized basis Lred in time "
            .e2t(time()-st));
    // now look which rows are shorter than M
    // we check for M*1.1 to be sure that float-errors
    // are dealt with.
    r := nops(Lgso);
    while _lazy_and(r>0, Lgso[r]>M*1.1) do r := r-1 end_while;
    // now make sure the other rows are really longer
    while _lazy_and(r <> 0, r<nops(Lgso), Lgso[r+1] < 10*Lgso[r]) do r := r+1 end_while;
    userinfo(9,"There are r=".e2t(r)." 'short' rows"); 

    //-----------------------------------------------------
    // now we will evaluate the results of the lattice-reduction
    // and -- if necessary -- construct the new B
    //-----------------------------------------------------
    if r=0 then
      ///warning ("This should not happen -- stopped earth-rotation.");
      //-----------------------------------------------------
      // something went terribly wrong, we need to start over
      // again.
      //-----------------------------------------------------
      new_f := TRUE;
      need_ab := TRUE;
      SmartHeuristic := FALSE;
      act_cutsize := act_cutsize + inc_cutsize;
      act_traces := act_traces + inc_traces;
      next;
    end_if; // r=0
                        
    //-----------------------------------------------------
    // the polynomial is irreducible
    //-----------------------------------------------------
    if r=1 then
      userinfo(9,"The (remaining) polynomial is irreducible");
      rf := rf.[f];
      rfs := rfs+1;
      break; // go to the final return
    end_if; // r=1
        
    //-----------------------------------------------------
    // we now know that there are at most r rational factors.
    // from the construction we know that r<n/3 since all 3-products
    // where tested during the lifting-process
    //-----------------------------------------------------
    if r>=n then
      //-----------------------------------------------------
      // Obviously we put too little information
      // into the lattice -- we need to 
      // increase the information put into the lattice.
      //-----------------------------------------------------
      userinfo (9, "In the GSO-reduced basis there are ".e2t(r).
                " short vectors -- need to increase the a's!");    
      act_cutsize := act_cutsize + inc_cutsize;
      act_traces := act_traces + inc_traces;
      need_ab := TRUE;
      // These values need to be restored.
      r := n; 
      B := [[_if(i=j,1,0) $j=1..n] $ i=1..r];
      next;
    end_if; // r>=n

    //-----------------------------------------------------
    // OK, there were several short vectors, i.e. some
    // possibly rational factors
    //-----------------------------------------------------
    // Compute the Reduced Rowecholon Form of Lred. Here there
    // should be made some improvements.
    st := time();    
    if SmartHeuristic = TRUE then
      // now reconstruct the `correct' encodings
      tmp := [[Lred[i][j] $ j=1..rB] $ i=1..r];
      B := [[_plus(tmp[i][k] * B[k][j] $ k=1..rB) $ j=1..n] $ i=1..r];               
    else
      B := [[Lred[i][j]/C $ j=1..n] $ i=1..r];
    end_if; // StartHeuristic
		
    //-----------------------------------------------------
    // there are too many short rows, they can't code
    // rational factors.
    // This is tested here because we use 
    //-----------------------------------------------------
    if r > n/4 then // All 3-subsets of 1..n were tested during lifting.
      userinfo (9, "There are too many short vectors.");    
      // get more info into the lattice
      act_traces := act_traces + inc_traces;
      act_cutsize := act_cutsize + inc_cutsize;
      need_ab := TRUE;
      next;
    end_if; // r>n/4
                
    //-----------------------------------------------------
    // Check whether B has the correct form (i.e., exactly
    // one 1 in each column
    //-----------------------------------------------------
    tmp := TRUE;
    tmp2 := [Dom::Multiset(abs(B[i][j]) $ i=1..r) $ j=1..n];
    tmp := TRUE;
    for i in tmp2 do
      if nops(i) <> 2 then
	// there are not only 0s and 1s
	tmp = FALSE;
	break;
      end_if; // nops(i) <> 2
    end_for; // i in tmp2
        
    if tmp <> TRUE then
      userinfo(9,"The reduced form has not the right form.");
      act_traces := act_traces + inc_traces;
      act_cutsize := act_cutsize + inc_cutsize;
      need_ab := TRUE;
      next;
    end_if;
        
    st := time();
    userinfo(9,"Now checking for irreducible factors.");
    // now check, which of these code true factors
    for j from 1 to r do
      // construct the product coded by B[j]
      tmp := poly(_mult(mf[i] $ i in 
			select({$1..n}, x -> (B[j][x] <> 0)))); 
      if tmp <> FAIL then // there were no 'used' factors.

	tmp := faclib::primpart(poly(multcoeffs(tmp,flc),Expr));
	tmp2 := divide(f,tmp,Exact); // trial division

	if tmp2 <> FAIL then

          // Yippie, found rational factor
	  f := tmp2; //use the new f
	  rf := rf.[tmp];
	  rfs := rfs + 1;
	  new_f := TRUE;

          // remove the `used' modular factors
	  // we replace them by FAIL and strip them
          // away in a next step.
	  for i from 1 to n do
	    if B[j][i] <> 0 then
	      mf[i] := FAIL; 
	    end_if;
	  end_for; // i=1..n
	  B[j] := FAIL;
	end_if; // found true factor
      end_if; // 'used' factors?
    end_for; // i=1..r, walk over the possibly rational factors

    //-----------------------------------------------------
    // Now remove the `used' modular factors and modify B
    // in the appropriate way
    // Return if all modular factors are `used up'
    //-----------------------------------------------------
    mf := select(mf, _unequal, FAIL);
    B := select(B, _unequal, FAIL);
    userinfo(9,"Finished Factor-searching for this round in time: "
            .e2t(time()-st));
            
    // Now look whether there is still work to be done
    if nops(B) = 0 or expr(f) = 1 then // nothing left to do.
      break;
    end_if;
                    
    //-----------------------------------------------------
    // if there were non-true factors increase d to get more
    // information into the lattice
    //-----------------------------------------------------
    if (not new_f) then // there were no factors found
      act_cutsize := act_cutsize + inc_cutsize;
      act_traces := act_traces + inc_traces;
      need_ab := TRUE;
    end_if;
        
  end_while; // TRUE
         
  // clear the remember-table of polylib::trace
  // this is an ugly trick, but no proper function is
  // available for that yet.
  sysassign(polylib::trace,subsop(polylib::trace,5=NIL));

  userinfo(1,"Knapsack done, returning ".e2t(rfs)
        ." factors");
  userinfo(9,"Knapsack-algorithm took ".e2t(time()-ST)."ms.");
  return (rf);
end_proc: //  faclib::knapsack


/*-------------------------------------------------------------------

faclib::randpart -- computes a random partition of a set.

Call -- faclib::randpart(S, n)

Parameters -- S - a set of type DOM_SET
              n - a natural number > 1 of
              
Return value -- a list of n sets [S1,..,Sn] such that 
                * _union(S1,..,Sn) = S, and
                * Si intersect Sj = {} if i <> j.
                
Description -- The size of the resulting sets can be estimated to
               be about equal.
               
               in case n <= nops(S) then n=nops(S) is assumed and 
               a list with nops(S) sets with one element each is 
               returned.
-------------------------------------------------------------------*/
                
faclib::randpart := proc(SS : DOM_SET, 
                 n : Type::PosInt)
local i, j, k, m, R, rnd, S;
begin
    S := SS;
    m := nops(S);
    
    // check if the parameters are degenerated.
    if m < n then n := m; end_if; 
    if n = 1 then return([ S ]); end_if;

    rnd := random(2*n);
    S := [ op(S) ];
    R := [{} $ i=1..n];

    for i from 1 to m do
        j := 1 + ((i*n div m) mod n);
        k := 1 + (rnd() mod m-i+1);
        R[j] := R[j] union {S[k]};
        delete S[k];
    end_for;        

    return (R);
end_proc: // faclib::randpart

/*-------------------------------------------------------------------

faclib::myorthog -- compute the Gram-Schmidt-orthogonalization and
                    return the norm of each vector.
 
Call -- faclib::myorthog(S <, Float>)

Parameters -- S  - a list of lists representing a matrix

Options -- Float - when Float is specified, the GSO will be computed
           using floats.

Return value -- a list containing the Euclidean lengths of the
                vectors created by the GSO.
                
Description -- This function is intended ONLY for use in 
               faclib::knapsack.
-------------------------------------------------------------------*/

faclib::myorthog := proc(S) // S = a list of n sublists
local n, m, OS, j, k, ss, s, tmp, normsquares, term, v;
save X;
begin
     if args(0) = 2 and args(2) = hold(Float) then
        S:= map(S, map, float);
     end_if;
     delete (X);
     n := nops(S);
     m := nops(S[1]); // the length of the vectors (=lists)

     // initialize container 'OS' = list of polynomials
     OS := [poly(0, [X]) $ n]; 

     // initialize container 'normsquares' to store the
     // squares norms of the orthogonolized vectors
     normsquares:= [ 0 $ n]; 

     // Gram-Schmidt of input lists S.1, S.2, .. , S.n:
     // b.j = S.j - sum(<S.j, b.k>/<b.k,b.k>*b.k,  k=1..j-1)
     // We use polynomials OS.k = b.k / <b.k, b.k> for speed
     // to compute the linear combinations of the vectors.
     // The polynomial OS.k and the list b.k are kept simultaneously.
     // The orthogonalized b.k is again stored in S.k!
     // Thus, the recursion is
     // a) OS.j = convert S.j (= list) to poly
     // b) OS.j = OS.j - sum(<S.j, S.k>*OS.k,  k=1..j-1)
     // c) S.j = convert OS.j (= poly) to list
     // d) OS.j = OS.j / <S.j, S.j> 

     OS[1]:= poly([[S[1][k], k] $ k = 1..m], [hold(_X)]):
     normsquares[1]:=  _plus(op(map(S[1], _power, 2)));
     if not iszero(normsquares[1]) then
        OS[1]:= mapcoeffs(OS[1], _mult, 1/normsquares[1]);
     end_if;

     for j from 2 to n do
       // convert list S[j] to polynomial S[j]
       OS[j]:= poly([[S[j][k], k] $ k = 1..m], [hold(_X)]):
       ss:= OS[j]: // ss = poly = S.j - sum(<S.j, b.k>/<b.k,b.k>*b.k, k=1..j-1)
       for k from 1 to j - 1 do
             if iszero(normsquares[k]) then next end_if;
             // scalar product
             s:= _plus(op(zip(S[j], S[k], _mult)));
             if not iszero(s) then
                 ss:= ss - mapcoeffs(OS[k],_mult,s );
             end_if
       end_for;
       OS[j] := ss;
       // reconvert poly to list
       v:= [0 $ m]: 
       tmp:= poly2list(OS[j]):
       for term in tmp do
           v[term[2]]:= term[1];
       end_for;
       S[j]:= v;
       normsquares[j]:=  _plus(op(map(S[j], _power, 2)));
       if not iszero(normsquares[j]) then
          OS[j]:= mapcoeffs(OS[j], _divide, normsquares[j]);
       end_if;
     end_for;
     
     // anyway -- we just need the norms:
     return(map(normsquares, sqrt));
     //return(S);
end_proc: // faclib::myorthog
