/*----------------------------------------------------------------
import::csv - reads CSV files

Calling sequence: 
        import::csv(filename <, separator> <, NonNested> <, Trim> <,German>)

Parameter: filename  - non-empty string or a positive integer
                       (a file descriptor as returned by fopen)
           separator - optional string , length(separator) = 1
           NonNested - optional name
	   Trim      - optional name
	   German    - optional name

Summary: import::csv is used to read CSV [1] data files produced by 
         external programs, like Microsoft Excel. CSV is an ASCII [2] 
	 based file format.

	 The CSV file format is a tabular data format that has fields 
	 separated by the comma character. 
	 Note: Some local versions of Microsoft Excel use semicolons 
	       instead of commas! 

	 [1] Comma Separated Values or Character Separated Values,
             as per RfC 4180
	 [2] American Standard Code for Information Interchange

         In contrast to finput, the data must not be ended by a colon 
         or semicolon. Data separated by 'separator' are interpreted 
         as single data items. The default separator is a comma.

	 Empty lines are ignored.

         Data items that cannot be converted to a valid MuPAD number
         are imported as MuPAD strings.

	 import::cvs tries to convert a number contained in the CVS file 
	 to a valid MuPAD number. For example: 1,234.56 or 1 234.56 
	 are converted to the MuPAD number 1234.56. The German format 
	 of numbers is different. Instead of a decimal point a decimal 
	 comma is used. For example: 1234,56 or 1.234,56 are converted 
	 to 1234.56 .
	 Note: A comma as CSV seperator doesn't make sense if the comma
	 is used to seperate thousands in a number or the decimal comma 
	 is used. A semicolon should be used as seperator.

	 The default format of numbers is the US format (decimal point).
 	 With DecimalComma import::cvs expected the German format of numbers. 
	 Note: All numbers contained in the CSV file must comply with
	 either the US or the German format, mixed formats cannot be
 	 converted.

         With NonNested, the result will be a list containing all data. 
         Otherwise, the result is a list of list, each 'inner' list
         representing a line of the CVS file. 

	 With Trim leading and trailing blanks in strings are removed.
   
         If the file is specified by a string, the corresponding
         file is opened and closed, automatically.
         If the user has opened a text file in Read mode and passes
         the file descriptor to import::readdata, the file remains
         open and needs to be closed by the user.


Examples:

---------------------------------------------------
File data: a ,12.5
             a-b   ,1234.56 
          
>> import::csv("data");
              [ ["a ", 12.5], ["  a-b   ", 1234.56] ]
>> import::csv("data", Trim);
              [ ["a", 12.5], ["a-b", 1234.56] ]
>> import::csv("data", NonNested);
              ["a ", 12.5, "  a-b   ", 1234.56]
---------------------------------------------------
File data: a ;12.5
           a-b;1,234.56 
	   a b; -12345.6789E-02
          
>> import::csv("data", ";");
              [ ["a ", 12.5], ["a-b", 1234.56], ["a b", -123.456789] ]
---------------------------------------------------
File data: abc;12,5
           a-b;1.234,56 
	   a b; -12345.6789E-02
          
>> import::csv("data", ";", DecimalComma);
              [ ["a ", 12.5], ["a-b", 1234.56], ["a b", -123.456789] ]



See also: import::readdata, read, finput, fread, ftextinput, 
          text2expr, text2list

---------------------------------------------------------------------*/

alias(Default_CSV_Seperator = ","):
alias(is_digit(s) = stringlib::contains("0123456789", s)):
alias(is_sign(s)  = stringlib::contains("+-", s)):

import::csv := proc(filename, separator = Default_CSV_Seperator)
local check_number, csv2mu, dec_sep, fd, isf, line, 
      nested, p, r, t, j, trim, trim_string, idx, entry;
begin
   //---------------------------------------------------
   //                Local procedures
   //---------------------------------------------------
   
   csv2mu := proc(x : DOM_STRING, dec_sep : DOM_STRING, trim : DOM_BOOL)
      local tmp;
   begin
      if traperror((tmp:= text2expr(x))) = 0 then
         if testtype(tmp, Type::Real) then
	    return( tmp )
	 end
      end;
      tmp := check_number(x, dec_sep);
      if trim and testtype(tmp, DOM_STRING) then 
         tmp := trim_string(tmp) 
      end;
      return( tmp )
   end;

   // trim - delete leading and trailing blanks

   trim_string := proc(s : DOM_STRING)
    local i, l;
   begin
    i := 1; l := length(s);
    while i <= l and s[i] = " " do i := i + 1 end;
    if i > 1 then
       s[1..i-1] := ""
    end;
    l := length(s); i := l;
    while i >= 1 and s[i] = " " do i := i - 1 end;
    if i < l then
       s[i+1..l] := ""
    end;
    return( s )
   end:


   // Try to convert the string s to a number
   // s - a string
   // decimal_seperator  = "," or "."
   //
   // Returns the string s if s cannot be converted
   // to a number, otherwise a number (integer or float)
   // is returned.
   //
   // Examples:
   //
   // check_number("  12.34", ".")  -> 12.34 
   // check_number("1,4", ",")      -> 1.4
   // check_number("1234.45", ".")  -> 1234.45
   // check_number("1,234.45", ".") -> 1234.45
   // check_number("1 234.45", ".") -> 1234.45
   // check_number("1234.45", ".")  -> 1234.45
   // check_number("1.234,45", ",") -> 1234.45
   // check_number("1 234.45", ".") -> 1234.45

   check_number := proc(s : DOM_STRING, decimal_seperator = "." : DOM_STRING)
    local i, l, t, thousand_seperator;
   begin
    i := 1; l := length(s);
    while i <= l and s[i] = " " do i := i + 1 end;
    if i > l or not (is_digit(s[i]) or is_sign(s[i])) then
       return(s)
    else
       t := s[i]; 
       i := i + 1;
       assert(decimal_seperator = "." or decimal_seperator = ",");
       if decimal_seperator = "." then
          thousand_seperator := ","
       else
          thousand_seperator := "."
       end;
    end;
    while i <= l and s[i] <> decimal_seperator do
      if s[i] = " " or s[i] = thousand_seperator then
         i := i + 1
      elif is_digit(s[i]) then
         t := t . s[i];
         i := i + 1
      else
         return(s)
      end;
    end;
    if i <= l then
       t := t . ".";
       if i = l then
          t := t . "0"
       else
         t := t . s[i+1..l]
       end
    end;
    if traperror((t:=text2expr(t))) = 0 and
       domtype(t) = DOM_INT or domtype(t) = DOM_FLOAT then
       return( t )
    else
       return( s )
    end
   end:

   //---------------------------------------------------
   //                  Main program
   //---------------------------------------------------
   if args(0) = 0 then
      error("Expecting at least 1 argument");
   end_if:
   if args(0) > 5 then
      error("Expecting no more than 5 arguments");
   end_if:
   //-----------------------------
   // check the file specification
   //-----------------------------
   case domtype(filename) 
   of DOM_STRING do
      if length(filename) = 0 then
         error("Empty filename")
      end_if:
      break;
   of DOM_INT do
      if filename <= 0 then
         error("Expecting the file descriptor as a positive ".
               " integer, got ".expr2text(filename));
      end_if:
      break;
   otherwise
      error("Illegal file specification. Expecting a string or ".
            " a file descriptor (a positive integer). Got: ".expr2text(filename));
   end_case;
   //-----------------------------
   // check the options
   //-----------------------------
   nested := TRUE; trim := FALSE; dec_sep := ".";
   case args(0)
     of 5 do
        if args(5) = hold(DecimalComma) then
           dec_sep := ","
	elif args(5) = hold(Trim) then
           trim := TRUE
	elif args(5) = hold(NonNested) then
             nested := FALSE
	else error("Illegal 5th argument. Expecting 'NonNested', 'Trim' or 'DecimalComma', got :".
                   expr2text(args(5)))
        end_if;	 
     of 4 do
        if args(4) = hold(DecimalComma) then
           dec_sep := ","
        elif args(4) = hold(Trim) then
           trim := TRUE
	elif args(4) = hold(NonNested) then
             nested := FALSE
	else error("Illegal 4th argument. Expecting 'NonNested', 'Trim' or 'DecimalComma', got :".
                   expr2text(args(4)))
        end_if;	   
     of 3 do 
        if args(3) = hold(DecimalComma) then
           dec_sep := ","
        elif args(3) = hold(Trim) then
           trim := TRUE
        elif args(3) = hold(NonNested) then
             nested := FALSE
        else error("Illegal 3rd argument. Expecting 'NonNested', 'Trim' or 'DecimalComma', got :".
                   expr2text(args(3)))
        end_if;
     of 2 do 
        if separator = hold(DecimalComma) then
           dec_sep := ","
        elif separator = hold(Trim) then
           trim := TRUE
        elif separator = hold(NonNested) then
              nested := FALSE
        elif not (testtype(separator, DOM_STRING) and 
           length(separator) = 1) then
              error("Illegal 2nd argument. Expecting separator, 'NonNested', 'Trim' or 'DecimalComma', got :".
                expr2text(args(2)))
        end_if
   end_case;
   //-----------------------------
   // try to open the file
   //-----------------------------
   if domtype(filename) = DOM_INT then
        // we have to assume that the user has opened
        // a file via fopen and passed the file descriptor
        // to import::readdata;
        fd := filename;
        // we rely on later calls to ftextinput to produce
        // appropriate error messages if the file descriptor
        // fd = filname specified by the user does not point
        // to a properly opened file
   else // The user passed a file name
        //---------------------------------------------------
        // search: 1) using READPATH
        //         2) as direct path
        //         3) using LIBPATH
        //---------------------------------------------------
        isf:= bool(filename[1] <> stdlib::PathSep);
        fd:= FAIL;
        //--------------------------------
        // 1) try to open with READPATH
        //--------------------------------
        if domtype(READPATH) <> DOM_IDENT then
           for p in READPATH do
               if isf then
                  if p[-1] <> stdlib::PathSep then
                     p:= p.stdlib::PathSep
                  end_if
               end_if;
               fd := fopen(p.filename, Read, Text);
               if fd <> FAIL then
                  break;
               end_if;
           end_for;
        end_if;
        //--------------------------------
        // 2) try to open with direct path
        //--------------------------------
        if fd = FAIL then
           fd := fopen(filename, Read, Text);
        end_if;
        //--------------------------------
        // 3) try to open with LIBPATH
        //--------------------------------
        if fd = FAIL then
           for p in LIBPATH do
               if isf then
                   if p[-1] <> stdlib::PathSep then
                       p:= p.stdlib::PathSep
                   end_if
               end_if;
               fd := fopen(p.filename, Read, Text);
               if fd <> FAIL then
                  break;
               end_if;
           end_for;
        end_if;
        //--------------------------------
        // 4) give up
        //--------------------------------
        if fd = FAIL then
           error("Cannot open file ".filename);
        end_if;
   end_if;

   //-------------------------------------------------------
   // Now, the file is opened and specified by the
   // file descriptor fd
   //-------------------------------------------------------

   //-----------------------------
   // read the data
   //-----------------------------
   if not testtype(separator, DOM_STRING) then
      separator := Default_CSV_Seperator
   end_if;
   r := table(): // a table of rows, indexed
   j := 1;       // via j = 1, 2, ...
  while type((line:=ftextinput(fd))) <> DOM_NULL do
     // remove trailing \r to handle windows line endings under linux
     if line<>"" and line[-1]="\r" then line := line[1..-2]; end_if;
     // skip empty lines
     if line="" then next; end_if;    
     t := [];
     while TRUE do // to allow multi-line entries
       while ((idx := strmatch(line, 
               "^(\"(?:[^\"]|\"\")*\"".separator.
               "|[^".separator."\"]*".separator.")", Index))) <> FALSE do
         if idx[2] > 1 then
           entry := line[1..idx[2]-1];
         else
           entry := "";
         end_if;
         line := line[idx[2]+1..-1];
         if length(entry) > 0 and entry[1] = "\"" then
           entry := stringlib::subs(entry[2..-2], "\"\""="\"");
         end_if;
         t := t.[entry];
       end_while;
       if length(line) > 0 and line[1] = "\"" then // multiline entry?
         if strmatch(line, "^\"(?:[^\"]|\"\")*\"$") then
           // no, just a quoted last entry
           t := t.[stringlib::subs(line[2..-2], "\"\""="\"")];
           break;
         elif strmatch(line, "^\"(?:[^\"]|\"\")*\"") then
           warning("illegally quoted entry ".
                   strmatch(line, "^\"(?:[^\"]|\"\")*\"", All)[1]);
           idx := strmatch(line, 
               "[^".separator."\"]*(?:".separator."|$)", Index);
           entry := line[1..idx[2]];
           line := line[idx[2]+1..-1];
           if entry[-1] = "\"" then
             entry := stringlib::subs(entry[2..-2], "\"\""="\"");
           end_if;
           t := t.[entry];           
           if length(line) = 0 then break; end_if;
         end_if;
         // multiline
         entry := ftextinput(fd);
         // remove trailing \r to handle windows line endings under linux
         if entry = null() then
           error("end of file within quoted string");
         end_if;
         if entry<>"" and entry[-1]="\r" then entry:= entry[1..-2]; end_if;
         line := line."\n".entry;
       else
         // last field is not quoted, therefore not multiline
         t := t.[line];
         break;
       end_if;
     end_while;
       
     // now, l = [string1, string2,...]
     // If an item in the line cannot be converted to
     // a real number via text2expr, then leave it as 
     // a string. Do not evaluate the result of text2expr,
     // e.g. eval(text2expr(x)) is unwanted.
     t := map (t, csv2mu, dec_sep, trim);
     if t <> [] then    // skip empty lines
        r[j]:= t;    // add the line to the row table
        j:= j+1:        // increase the row counter
     end_if;
   end_while;
   r := [r[j] $ j = 1..nops(r)]:
   if (not nested) then 
      // flatten the list r
      r := map(r, op):
   end_if;
   if domtype(filename) = DOM_STRING then 
      fclose(fd);
   end_if:
   return(r);
end_proc:
