--------------------------BEGIN-VHDL-LICENSE-----------------------------
-- fasu.vhdl - Floating Point Add/Sub Execution Unit for the F-CPU
-- Copyright (C) 2003, 2004 -- SEMET Gaetan <gaetan@xeberon.net>

-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
---------------------------END-VHDL-LICENSE------------------------------
-- version Thu Dec 30 2003
-- version Mon Jan  5 2004
--
-- since i use some Michael Riepe code, and because he helped me a lot ;)
-- i'll put its copyright:
-- Copyright (C) 2000, 2001, 2003 Michael Riepe <michael@stud.uni-hannover.de>


library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.ALL;
use IEEE.std_logic_unsigned.ALL;
               
use work.FCPU_config.all;
use work.generic_adder.all;
use work.Bit_Manipulation.all;

entity EU_FASU is
  generic (
    WIDTH : natural := 64
  );

  port(
  -- inputs : 
    -- operands
    Din_0    : in std_ulogic_vector(WIDTH-1 downto 0);
    Din_1    : in std_ulogic_vector(WIDTH-1 downto 0);
    -- subtract flag (should be derived from opcode)
    -- 0 for fadd, 1 for fsub
    Substract : in std_ulogic;
    -- flag bits (directly copied from instruction word)
--    Flags    : in std_ulogic_vector(23 downto 18);
    ieee_flag: in std_ulogic; -- 0 : ieee flag not set, 1: ieee flag set
    -- SIMD mode bits (not decoded)
    SIMD     : in std_ulogic_vector(1 downto 0); -- 00 32 bit FP, 01 64 bit FP
    -- clock/reset/enable inputs
    Clk      : in std_ulogic;
    Rst      : in std_ulogic;
    En       : in std_ulogic;

  -- outputs:
    -- Result (8/16/32)
    Dout_0   : out std_ulogic_vector(WIDTH-1 downto 0);
    -- FPU Exception Flags
    FExout   : out std_ulogic_vector(8 downto 0)
    -- FExout(0) : DBZ  : Divide By Zero Detected
    -- FExout(1) : IOD  : Illegal Operand Detected
    -- FExout(2) : EUD  : Exponent Underflow Detected
    -- FExout(3) : EOD  : Exponent Overflow Detected
    -- FExout(4) : SNAN : Signaling Not A Number output
    -- FExout(5) : QNAN : Quiet Not A Number output
    -- FExout(6) : INF  : Infinite output
    -- FExout(7) : INE  : Inexact output
    -- FExout(8) : ZERO : Zero output 
  );
--pragma synthesis_off
begin
  assert (WIDTH >= 64) and (WIDTH mod 64 = 0)
    report "width of FASU must be an integer multiple of 64"
    severity failure;
--pragma synthesis_on
end EU_FASU;


-- Outputs:
--                +---------------------------------------------------+
--  Addition      |                    B                              |
--   A + B        +----------+----------+----------+---------+--------+
--                |    F     |  -infty  |  +infty  |  QNaN   |  SNaN  |
--+----+----------+----------+----------+----------+---------+--------+
--|    | F        |    F     |  -infty  |  +infty  |  QNaN   |  SNaN  |
--|    | -infty   |  -infty  |  -infty  |   QNaN   |  QNaN   |  SNaN  |
--| A  | +infty   |  +infty  |   QNaN   |  +infty  |  QNaN   |  SNaN  |
--|    | QNaN     |   QNaN   |   QNaN   |   QNaN   |  QNaN   |  SNaN  |
--|    | SNaN     |   SNaN   |   SNaN   |   SNaN   |  SNaN   |  SNaN  |
--+----+----------+----------+----------+----------+---------+--------+
--
--                +---------------------------------------------------+
--  Substraction  |                    B                              |
--   A - B        +----------+----------+----------+---------+--------+
--                |    F     |  -infty  |  +infty  |  QNaN   |  SNaN  |
--+----+----------+----------+----------+----------+---------+--------+
--|    | F        |    F     |  +infty  |  -infty  |  QNaN   |  SNaN  |
--| A  | -infty   |  -infty  |   QNaN   |  -infty  |  QNaN   |  SNaN  |
--|    | +infty   |  +infty  |  +infty  |   QNaN   |  QNaN   |  SNaN  |
--|    | QNaN     |   QNaN   |   QNaN   |   QNaN   |  QNaN   |  SNaN  |
--|    | SNaN     |   SNaN   |   SNaN   |   SNaN   |  SNaN   |  SNaN  |
--+----+----------+----------+----------+----------+---------+--------+
--
-- F : any finite floating value, ie except {S,Q}NaN, +/-infinity

-- Note FPU doesn't generate Signaling Not A Number (SNaN) except if
-- a SNaN was given in one operand.
-- Rem: IEEE wants a SNAN to be generated when overflow occurs, in place of
-- a QNAN
-- QNAN isgenerated when an illegal operation occurs
-- Rem : FPU will not ouput -0 but +0, even if two -0 were given.


-- Operating mode:
--   Single: Float    : 32 bits
--           Mantissa : 23 bits
--           Exponent :  8 bits
--           Sign     :  1 bit
--   Double: Float    : 64 bits
--           Mantissa : 52 bits
--           Exponent : 11 bits
--           Sign     :  1 bit
-- SIMD Modes:
--            00 : 32 bit mode (float)
--            01 : 64 bit mode (double)
--            any other combinaison is invalid


architecture Behav_1 of EU_FASU is

  -- COMPONENTS



  -- CONSTANTS
  
  
  -- single/double mantissa and exponent sizes
  constant  SGL_SIZE  : natural := 32;
  constant  SGL_M_SIZE : natural := 23;
  constant  SGL_E_SIZE : natural := 8;
  constant  DBL_SIZE  : natural := 64;
  constant  DBL_M_SIZE : natural := 52;
  constant  DBL_E_SIZE : natural := 11;
  -- signs are always 1 bit long...

  -- number of 64 bit blocks by word
  constant  BLOCK64_NBR : natural := WIDTH/64;

  -- single/double mantissa and exponent positions in the different SIMD modes
  -- SIMD 32 bit mode
  constant SGL_M_Start : natural := 0;
  constant SGL_M_End   : natural := 22;
  constant SGL_E_Start : natural := 23;
  constant SGL_E_End   : natural := 30;
  constant SGL_S_Pos   : natural := 31;
  -- SIMD 64 bit mode : 
  constant DBL_M_Start : natural := 0;
  constant DBL_M_End   : natural := 51;
  constant DBL_E_Start : natural := 52;
  constant DBL_E_End   : natural := 62;
  constant DBL_S_Pos   : natural := 63;
  -- TODO: don't know VHDL syntax for having Fa(SGL2_M_POS) with 
  -- SGL2_M_POS = "SGL2_M_End downto SGL2_M_Start"
  


  -- indices for Super Mode and Mode vectors
  constant Super_Mode_IEEE  : natural :=  0; -- 0: IEEE exception flag not set
                                             -- 1: IEEE exception flag not set
  constant Super_Mode_Sub   : natural :=  1; -- 0: add, 1: subtraction
  constant Super_Mode_SIMD0 : natural :=  2; -- 0 : 32 bit SIMD (single)
                                             -- 1 : 64 bit SIMD (double)
  constant Super_Mode_SIMD1 : natural :=  3; -- reserved for further 
                                             -- floating formats
  
  constant SUPER_MODE_VECTOR_SIZE : natural := 4;  -- Super Mode vector size
  constant MODE_VECTOR_SIZE       : natural := 7;  -- Mode vector size
  
  constant Mode_EX0  : natural :=  0; -- exceptions indicator bit 0
  constant Mode_EX1  : natural :=  1; -- exceptions indicator bit 1
  constant Mode_EX2  : natural :=  2; -- exceptions indicator bit 2
             -- 000 : no exception occured
             -- 100 : DBZ
             -- 101 : IOD
             -- 110 : EUD
             -- 111 : EOD
  constant Mode_SO0  : natural :=  3; -- Special Output indicator bit 0
  constant Mode_SO1  : natural :=  4; -- Special Output indicator bit 1
  constant Mode_SO2  : natural :=  5; -- Special Output indicator bit 2
             -- 000 : no special output
             -- 001 : output is in Fa vector
             -- 010 : SNaN
             -- 011 : QNaN
             -- 100 : +inf
             -- 101 : -inf
             -- 110 : +0
             
             -- IMPORTANT: if Mode_SpecialOutput is set to Fb (Mode_SO_Fb),
             -- the result sign will be not the sign of Fb if we are
             -- in substration instruction => the last stage will have
             -- to invert the sign bit of Fb

  constant Mode_DONE : natural := 6;  -- finished calculation flag
                                      -- if Mode_EX != "000" then result is 
                                      -- an exception (NaN, inf,...)
                                      -- or the result is in Fout vector
                                      -- (in the current stage)
  -- TODO: rounding modes

  -- Mode sub-vectors values
  constant Mode_SIMD_Single  : std_ulogic_vector(1 downto 0) := "00";
  constant Mode_SIMD_Double : std_ulogic_vector(1 downto 0) := "01";

  constant Mode_Ex_None     : std_ulogic_vector(2 downto 0) := "000";
  constant Mode_Ex_DBZ      : std_ulogic_vector(2 downto 0) := "100";
  constant Mode_Ex_IOD      : std_ulogic_vector(2 downto 0) := "101";
  constant Mode_Ex_EUD      : std_ulogic_vector(2 downto 0) := "110";
  constant Mode_Ex_EOD      : std_ulogic_vector(2 downto 0) := "111";
  
  constant Mode_SO_None     : std_ulogic_vector(2 downto 0) := "000";
  constant Mode_SO_Fa       : std_ulogic_vector(2 downto 0) := "001";
  constant Mode_SO_SNaN     : std_ulogic_vector(2 downto 0) := "010";
  constant Mode_SO_QNaN     : std_ulogic_vector(2 downto 0) := "011";
  constant Mode_SO_INF      : std_ulogic_vector(2 downto 0) := "100";
  constant Mode_SO_MINF     : std_ulogic_vector(2 downto 0) := "101";
  constant Mode_SO_ZERO     : std_ulogic_vector(2 downto 0) := "110";


  -- indicies of Repres representation vector
  -- (for easy recognition of input type through the pipeline)
  constant REPRES_VECTOR_SIZE         : natural := 6;
  
  constant REPRES_NORMALISED   : natural := 0;
  constant REPRES_DENORMALISED : natural := 1;
  constant REPRES_INFTY        : natural := 2;
  constant REPRES_ZERO         : natural := 3;
  constant REPRES_NAN          : natural := 4;
  -- note: the MSB of the Repres vector is the "sign", ie +infty or -infty,...
  -- SNaN => MSB=0
  -- QNaN => MSB=1
  constant REPRES_MSB          : natural := 5;

  -- indices for FExout exception output vector
  constant Ex_DBZ  : natural := 0;  --  Divide By Zero Detected
  constant Ex_IOD  : natural := 1;  --  Illegal Operand Detected
  constant Ex_EUD  : natural := 2;  --  Exponent Underflow Detected
  constant Ex_EOD  : natural := 3;  --  Exponent Overflow Detected
  constant Ex_SNAN : natural := 4;  --  Signaling Not A Number output
  constant Ex_QNAN : natural := 5;  --  Quiet Not A Number output
  constant Ex_INF  : natural := 6;  --  Infinite output
  constant Ex_INE  : natural := 7;  --  Inexact output
  constant Ex_ZERO : natural := 8;  --  Zero output 


  -- special types
  type t_float is (t_single, t_double);

  -- special Floating point values for outputs
  -- single floats (32 bit)
  constant const32_infty   : std_ulogic_vector    
                           := "01111111100000000000000000000000"; --  +infinity
  constant const32_m_infty : std_ulogic_vector    
                           := "11111111100000000000000000000000"; --  -infinity
  constant const32_SNaN    : std_ulogic_vector    
                           := "01111111100000000000000000000001"; --    SNaN
  constant const32_QNaN    : std_ulogic_vector    
                           := "01111111110000000000000000000001"; --    QNaN
  constant const32_0       : std_ulogic_vector    
                           := "00000000000000000000000000000000"; --       0
  constant const32_m_0     : std_ulogic_vector    
                           := "10000000000000000000000000000000"; --      -0
  
  -- double floats (64 bit)
  constant const64_infty   : std_ulogic_vector                    --  +infinity
        := "0111111111110000000000000000000000000000000000000000000000000000";
  constant const64_m_infty : std_ulogic_vector                    --  -infinity
        := "1111111111110000000000000000000000000000000000000000000000000000";
  constant const64_SNaN    : std_ulogic_vector                    --    SNaN
        := "0111111111110000000000000000000000000000000000000000000000000001"; 
  constant const64_QNaN    : std_ulogic_vector                    --    QNaN
        := "0111111111111000000000000000000000000000000000000000000000000001";
  constant const64_0       : std_ulogic_vector                    --       0
        := "0000000000000000000000000000000000000000000000000000000000000000";
  constant const64_m_0     : std_ulogic_vector                    --      -0
        := "1000000000000000000000000000000000000000000000000000000000000000"; 
  
  

  -- SIGNALS
  
  -- note : i use Symphony Sonata EDA Evaluation version for main programming
  -- and this version does not allow to waveform variable so I put lot of
  -- variables in signals...
  
  
  -- general signals
  signal Fout : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);  



  -- stage 1 inputs registers
  signal S1Enable     : std_logic;
  signal S1SuperMode  : std_ulogic_vector(SUPER_MODE_VECTOR_SIZE-1 downto 0);
  signal S1Modes      : std_ulogic_vector(2*BLOCK64_NBR*MODE_VECTOR_SIZE-1 
                                          downto 0);
        -- if operating in 32 bit SIMD mode, then we have twice more 
        -- computations to do...
  signal S1Fa         : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  signal S1Fb         : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  
  
  -- stage 1 output and stage 2 input registers
  signal S2Enable     : std_ulogic;
  signal S12SuperMode : std_ulogic_vector(SUPER_MODE_VECTOR_SIZE-1 downto 0);
  signal S12Modes     : std_ulogic_vector(2*BLOCK64_NBR*MODE_VECTOR_SIZE-1
                                          downto 0);
  signal S12DE        : std_ulogic_vector (2*BLOCK64_NBR*(SGL_E_SIZE+1)-1
                                           downto 0);   -- exponent difference
   -- S12DE(SIZE-1 downto 0) : DE = Ea - Eb
   -- S12DE(SIZE)          : sign
                   -- TODO kind of MAX(2*SGL_E_SIZE, DBL_E_SIZE)
  signal S12Repr      : std_ulogic_vector(BLOCK64_NBR*REPRES_VECTOR_SIZE*2*2-1
                                          downto 0);
                   --  Representation of number
                   --  BLOCK64_NBR : can have BLOCK64_NBR 64-bit block
                   --   * 6        : each value is coded over 6 bits
                   --   * 2        : first operand, second operand
                   --   * 2        : in 64-bit SIMD mode: only 1 is used
                   --                in 32-bit SIMD mode: 1 for the first pair,
                   --                                     1 for the second pair
                   -- 000001 : normal representation
                   -- 000010 : denormalised representation
                   -- 000100 : +inf
                   -- 100100 : -inf
                   -- 001000 : +zero
                   -- 101000 : -zero
                   -- 010000 : QNaN
                   -- 110000 : SNaN
  signal S12Fa        : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  signal S12Fb        : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  signal S12Ma        : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+2)-1
                                           downto 0);
      -- Ma(SIZE-1 downto 0) : orignal mantissa
      -- Ma(SIZE)            : added MSB
      -- Ma(SIZE+1)          : reserved for mantissa overflow
      -- Ma(SIZE+2)          : mantissa sign
  signal S12Mb        : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+1)-1 
                                           downto 0);
      -- Ma(SIZE-1 downto 0) : orignal mantissa
      -- Ma(SIZE)            : added MSB 
  
  -- stage 2 output and stage 3 input registers
  signal S3Enable     : std_logic;
  signal S23SuperMode : std_ulogic_vector (SUPER_MODE_VECTOR_SIZE-1 downto 0);
  signal S23Modes     : std_ulogic_vector (2*BLOCK64_NBR*MODE_VECTOR_SIZE-1 
                                           downto 0);
  signal S23Fa        : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  signal S23Fb        : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  signal S23Repr      : std_ulogic_vector (BLOCK64_NBR*6*2*2-1 downto 0);
  signal S23Ma        : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+1)-1 
                                           downto 0);
  signal S23Mb        : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+1)-1
                                           downto 0);
  signal S23E         : std_ulogic_vector (BLOCK64_NBR*SGL_E_SIZE*2-1
                                           downto 0);
  signal S23S         : std_ulogic_vector (BLOCK64_NBR*2-1 downto 0);

  -- stage 3 output and stage 4 input registers
  signal S4Enable     : std_logic;
  signal S34SuperMode : std_ulogic_vector (SUPER_MODE_VECTOR_SIZE-1 downto 0);
  signal S34Modes     : std_ulogic_vector (2*BLOCK64_NBR*MODE_VECTOR_SIZE-1 
                                           downto 0);
  signal S34Repr      : std_ulogic_vector (BLOCK64_NBR*6*2*2-1 downto 0);
  signal S34Fa        : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);
  signal S34Fb        : std_ulogic_vector (BLOCK64_NBR*WIDTH-1 downto 0);

--  signal S34Ma        : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+1)-1 
--                                           downto 0);
--  signal S34Mb        : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+1)-1
--                                           downto 0);
  signal S34E         : std_ulogic_vector (BLOCK64_NBR*SGL_E_SIZE*2-1
                                           downto 0);
  signal S34M         : std_ulogic_vector (BLOCK64_NBR*(4*16)-1 downto 0);
  signal S34Mi        : std_ulogic_vector (BLOCK64_NBR*(4*16)-1 downto 0);
  signal S34G         : std_ulogic_vector (BLOCK64_NBR*(4)-1 downto 0);
  signal S34P         : std_ulogic_vector (BLOCK64_NBR*(4)-1 downto 0);
  signal S34S         : std_ulogic_vector (BLOCK64_NBR*2-1 downto 0);

  -- stage 4 output and stage 5 input registers
  signal S5Enable     : std_logic;
  signal S45SuperMode : std_ulogic_vector (SUPER_MODE_VECTOR_SIZE-1 downto 0);
  signal S45Modes     : std_ulogic_vector (2*BLOCK64_NBR*MODE_VECTOR_SIZE-1 
                                           downto 0);
  signal S45M         : std_ulogic_vector (BLOCK64_NBR*(DBL_M_SIZE+1)-1 downto 0);     
  signal S45Repr      : std_ulogic_vector (BLOCK64_NBR*6*2*2-1 downto 0);
  signal S45E         : std_ulogic_vector (BLOCK64_NBR*SGL_E_SIZE*2-1 downto 0);
  signal S45C         : std_ulogic_vector (2*BLOCK64_NBR-1 downto 0);     

--
  -- stage 3 signals
--  signal S3DE : std_ulogic_vector (ESIZE downto 0);
--  signal S3Ma : std_ulogic_vector (MSIZE downto 0);
--  signal S3Mb : std_ulogic_vector (MSIZE downto 0);
--  -- stage 3 outputs and stage 4 inputs
--  signal S34Fout  : std_ulogic_vector (FLSIZE-1 downto 0);  
--  signal S4Mode : std_ulogic_vector(14 downto 0);
--
--  signal S4E : std_ulogic_vector(ESIZE-1 downto 0);
--  signal S4Ma : std_ulogic_vector(FLSIZE-1 downto 0);
--  signal S4Mb : std_ulogic_vector(FLSIZE-1 downto 0);
--  signal S4MaF: F_VECTOR;
--  signal S4MbF: F_VECTOR;
--  signal S4Sa : std_ulogic;
--  signal S4Sb : std_ulogic;
--  signal S4AddSub : std_ulogic;
--  --stage 4 signals  
--  signal S4ManAddEn  : std_ulogic := '0';
--  signal S4ManAddOut : F_VECTOR;
--  -- stage 3 outputs
--  signal S4Fout  : std_ulogic_vector (FLSIZE-1 downto 0);  
--  signal S4FExout: std_ulogic_vector(8 downto 0);
--  signal S4out   : std_ulogic;
--
--
--
--  -- stage 5 inputs
--  signal S5Mode : std_ulogic_vector(14 downto 0);
--  signal S5E : std_ulogic_vector(ESIZE-1 downto 0);
--  -- stage 5 signals
--  signal S5M : std_ulogic_vector (MSIZE+2 downto 0);
--  -- note : why MSIZE + 2?????
--  -- S5M(MSIZE+2)    : to have sign
--  -- S5M(MSIZE+1)    : in case of Mantissa Adder overflow
--  -- S5M(MSIZE)      : MSB
--  -- S5M(MSIZE-1..0) : 23 bit mantissa (for single)
--  -- stage 6 outputs
--  signal S5Fout : std_ulogic_vector (FLSIZE-1 downto 0);  
--  signal S5FExout: std_ulogic_vector(8 downto 0);
--
--




  
-- INTERNAL FUNCTIONS

  -- this function right shift F by N bits 
  -- != from rshift from bit_manipulation.vhdl where N is a natural
  -- here N is unknown (it's a bit vector)
  -- used only for exponent right shifting
  -- mantissa shifting goes from 0 to 52 so we can hardwire it directly 
  -- (N will be only coded with 6 bits)
  -- G : what bit to insert ?
  function fasu_rshift(F : std_ulogic_vector; N: std_ulogic_vector(5 downto 0);
                       G : std_ulogic := '0') return std_ulogic_vector is
    constant L : natural := F'length;
    variable yy : std_ulogic_vector(L-1 downto 0);
  begin
    if (N(0) = '1') then
      yy(L-1) := G;
      yy(L-2 downto 0) := F(L-1 downto 1);
     else
       yy := F;
     end if;
    if (N(1) = '1') then
      yy(L-1) := G;
      yy(L-2) := G;
      yy(L-2-1 downto 0) := yy(L-1 downto 2);
    end if;
      
    if (N(2) = '1') then
      yy(L-1 downto L-3-1) := (others => G);
      yy(L-4-1 downto 0) := yy(L-1 downto 4);
    end if;
      
    if (N(3) = '1') then
      yy(L-1 downto L-7-1) := (others => G);
      yy(L-8-1 downto 0) := yy(L-1 downto 8);
    end if;
      
    if (N(4) = '1') then
      yy(L-1 downto L-15-1) := (others => G);
      yy(L-16-1 downto 0) := yy(L-1 downto 16);
    end if;
      
    if (L >= DBL_M_SIZE) then
      if (N(5) = '1') then
        yy(L-1 downto L-31-1) := (others => G);
        yy(L-32-1 downto 0) := yy(L-1 downto 32);
      end if;
    end if;
        
    return yy;
  end function;
  -- hope this is not too deep... 
  -- TODO: delay estimation
  
  
  -- integer incrementer
  function fasu_incr(F: std_ulogic_vector) return std_ulogic_vector is
  begin
    return F xor lshift(cascade_and(F), 1, '1');
  end function;

  -- integer decrementer
  function fasu_decr(F: std_ulogic_vector) return std_ulogic_vector is
  begin
    return not fasu_incr(not F);
  end function;

  -- integer negation
  function fasu_neg(F: std_ulogic_vector) return std_ulogic_vector is
  begin
    return fasu_incr(not F);
  end function;
  
  -- 8 bit exponent substraction
  -- A and B are unsigned integer representation
  -- Y is unsigned integer representation
  -- C represents the sign of result
  procedure fasu_sub(A, B : in std_ulogic_vector;  Y : out std_ulogic_vector;
                     C : out std_ulogic) is
  constant L : natural := A'length;
  variable aa, bb: std_ulogic_vector(L downto 0);
  variable r : std_ulogic_vector(L downto 0);
  variable ri: std_ulogic_vector(L downto 0);
  variable g, p : std_ulogic;
  begin -- A - B = A + not(B) + 1                                    _
        -- use generic adder Carry select adder to have directly A + B +1
    aa(L) := '0';
    aa(L-1 downto 0) := A;
    bb(L) := '0';
    bb(L-1 downto 0) := B;
    CSAdd(aa, not bb, r, ri, g, p);
    -- r  is A + B
    -- ri is A + B + 1
    if (ri(L) = '1') then -- result < 0
      Y := fasu_neg(ri(L-1 downto 0));
       C := ri(L);
    else
      Y := ri(L-1 downto 0);
      C := ri(L);
    end if;
  end procedure;
  -- note: does it fit into 1 cycle???

  procedure fasu_add(A, B : in std_ulogic_vector; 
                     Y, Z : out std_ulogic_vector; 
                     G, P : out std_ulogic) is

  begin
    CSAdd(A, B, Y, Z, G, P);
  end procedure;

  -- return true is F only have 0
  function fasu_isnull(F: std_ulogic_vector) return boolean is
    variable m   : std_logic_vector(1 to F'length);
  begin
    m := To_StdLogicVector(F);
    return (m=0);
  end;

begin


-- USED COMPONENTS INSTANTIATIONS:





-- FLOATING POINT ADD/SUB UNIT STAGES : 


  S1Fa <= Din_0(WIDTH-1 downto 0);
  S1Fb <= Din_1(WIDTH-1 downto 0);
  Dout_0 <= Fout;
  
  S1Enable <= En;

  S1SuperMode(Super_Mode_IEEE) <= ieee_flag;
  S1SuperMode(Super_Mode_Sub)  <= Substract;
  S1SuperMode(Super_Mode_SIMD0)<= SIMD(0);
  S1SuperMode(Super_Mode_SIMD1)<= SIMD(1);
  
  mode_inst: for i in 0 to 2*BLOCK64_NBR-1 generate
    S1Modes(i*MODE_VECTOR_SIZE + Mode_EX0)  <= '0';
    S1Modes(i*MODE_VECTOR_SIZE + Mode_EX1)  <= '0';
    S1Modes(i*MODE_VECTOR_SIZE + Mode_EX2)  <= '0';
    S1Modes(i*MODE_VECTOR_SIZE + Mode_SO0)  <= '0';
    S1Modes(i*MODE_VECTOR_SIZE + Mode_SO1)  <= '0';
    S1Modes(i*MODE_VECTOR_SIZE + Mode_SO2)  <= '0';
    S1Modes(i*MODE_VECTOR_SIZE + Mode_DONE) <= '0';
  end generate;
     
  -- stage 1 : start DE calculation and // trivial case checks
  stage_1 : process (Clk, Rst)

     -- input signals as single floats
     variable Fa      : std_ulogic_vector(DBL_SIZE-1 downto 0);
     variable Fb      : std_ulogic_vector(DBL_SIZE-1 downto 0);
     -- mode vector for current 32-bit block
     variable Modes   : std_ulogic_vector(MODE_VECTOR_SIZE-1 downto 0);
     -- representation of single A
     variable ReprA   : std_ulogic_vector(6-1 downto 0); 
     -- representation of single B
     variable ReprB   : std_ulogic_vector(6-1 downto 0);           
     -- DE = Ea - Eb    
     variable DE      : std_ulogic_vector(DBL_E_SIZE+1-1 downto 0);
      -- carry bit
    -- mantissa
     variable Ma, Mb : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);

    variable SIZE    : natural;
    variable E_SIZE  : natural;
    variable E_Start : natural;
    variable E_End   : natural;
    variable M_SIZE  : natural;
    variable M_Start : natural;
    variable M_End   : natural;
    variable S_Pos   : natural;
    variable Ea,Eb   : std_ulogic_vector(2*SGL_E_SIZE-1 downto 0);
    variable FT      : t_float;
    variable L      : natural;
  begin
    if (Rst = '1') then
      S2Enable <= '0';
    else
       if (rising_edge(Clk)) then
        if (S1Enable = '1') then
          if (S1SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
              = Mode_SIMD_Single) then -- 32 bit SIMD Mode
              
            FT := t_single;
            L := 2;
            SIZE    := SGL_SIZE;
            E_SIZE  := SGL_E_SIZE;
            E_Start := SGL_E_Start;
            E_End   := SGL_E_End;
            M_SIZE  := SGL_M_SIZE;
            M_Start := SGL_M_Start;
            M_End   := SGL_M_End;
            S_Pos   := SGL_S_Pos;
            
          elsif (S1SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
                 = Mode_SIMD_Double) then -- 64 bit SIMD Mode
                 
            FT := t_single;
            L := 1;
            SIZE    := DBL_SIZE;
            E_SIZE  := DBL_E_SIZE;
            E_Start := DBL_E_Start;
            E_End   := DBL_E_End;
            M_SIZE  := DBL_M_SIZE;
            M_Start := DBL_M_Start;
            M_End   := DBL_M_End;
            S_Pos   := DBL_S_Pos;
          else
            report "Unknown SIMD format" severity failure;
          end if;
                  
          for i in 0 to L*BLOCK64_NBR-1 loop -- for each block
            
            -- inputs
            Fa(SIZE-1 downto 0)    := S1Fa((i+1)*SIZE-1 downto i*SIZE);
            Fb(SIZE-1 downto 0)    := S1Fb((i+1)*SIZE-1 downto i*SIZE);
            Modes                  := S1Modes((i+1)*MODE_VECTOR_SIZE-1 
                                                     downto i*MODE_VECTOR_SIZE);


--             Ma(M_SIZE+1) := '0'; -- mantissa sign bit = 0 (by default, 
                                   -- mantissa is supposed > 0)
      --       Ma(M_SIZE+1) := '0'; -- reserved for mantissa add overflow
             Ma(M_SIZE  ) := '0'; -- reserved for mantissa MSB 
                                  -- (not represented in normal float)
             Ma(M_SIZE-1 downto 0) := Fa(M_End downto M_Start); 
             
--             Mb(M_SIZE+1) := '0'; -- mantissa sign bit = 0
                                  -- (by default, mantissa is supposed > 0)
      --       Mb(M_SIZE+1) := '0'; -- reserved for mantissa add overflow
             Mb(M_SIZE  ) := '0'; -- reserved for mantissa MSB 
                                  -- (not represented in normal float)
             Mb(M_SIZE-1 downto 0) := fb(M_End downto M_Start); 
             
             Ea(E_SIZE-1 downto 0) := fa(E_End downto E_Start);
             Eb(E_SIZE-1 downto 0) := Fb(E_End downto E_Start);
          
             fasu_sub(Ea(E_SIZE-1 downto 0), Eb(E_SIZE-1 downto 0), 
                      DE(E_SIZE-1 downto 0), DE(E_SIZE));
             
            -- in parallel try to determin the type of operand we have...
            if reduce_or(Fa(E_End downto E_Start)) = '0' then      -- if Exp = 0..0
              if reduce_or(Fa(M_End downto M_Start)) = '0' then    --   if M = 0..0
                -- A is zero number
                ReprA := (REPRES_MSB => Fa(S_Pos), 
                           REPRES_ZERO => '1',
                           others => '0');
              else
                -- A is denormalised
                ReprA := (REPRES_DENORMALISED => '1',
                           others => '0');
              end if;
            elsif reduce_and(Fa(E_End downto E_Start)) = '1' then -- if Exp = 1..1
              if reduce_or(Fa(M_End downto M_Start)) = '0' then   --   if M = 0..0
                -- A is +/- inf
                ReprA := (REPRES_MSB => Fa(S_Pos), 
                           REPRES_INFTY => '1',
                           others => '0');
              else
                -- A is NaN
                ReprA := (REPRES_MSB => Fa(M_End),
                           REPRES_NAN => '1',
                           others => '0');
              end if;
            else
                -- A is normal
                ReprA := (REPRES_NORMALISED => '1',
                           others => '0');
            end if;
      
            if reduce_or(Fb(E_End downto E_Start)) = '0' then     -- if Exp = 0..0
              if reduce_or(Fb(M_End downto M_Start)) = '0' then   --   if M = 0..0
                -- B is zero number
                ReprB := (REPRES_MSB => Fb(S_Pos),
                           REPRES_ZERO => '1',
                           others => '0');
              else
                -- B is denormalised
                ReprB := (REPRES_DENORMALISED => '1',
                           others => '0');
              end if;
            elsif reduce_and(Fb(E_End downto E_Start)) = '1' then -- if Exp = 1..1 
              if reduce_or(Fb(M_End downto M_Start)) = '0' then   --   if M = 0..0
                -- B is +/- inf
                ReprB := (REPRES_MSB => Fb(S_Pos),
                           REPRES_INFTY => '1',
                           others => '0');
              else
                -- B is NaN
                ReprB := (REPRES_MSB => Fb(M_End),
                           REPRES_NAN => '1',
                           others => '0');
              end if;
            else
                -- B is normal
                ReprB := (REPRES_NORMALISED => '1',
                           others => '0');
            end if;
            
      --      -- in parrallel neg the mantissa if the sign bit is set
      --      if (Faio(SGL_S_POS) = '1') then -- if Fa < 0 then neg Ma
      --        Mao(M_SIZE+2 downto 0) := fasu_neg(Ma(M_SIZE+2 downto 0));
      --      else
      --        Mao(M_SIZE+2 downto 0) := Ma(M_SIZE+2 downto 0);      
      --      end if;
      --      
      --      if (Fbio(SGL_S_POS) = '1') then -- if Fb < 0 then neg Mb
      --        Mbo(M_SIZE+2 downto 0) := fasu_neg(Mb(M_SIZE+2 downto 0));
      --      else
      --        Mbo(M_SIZE+2 downto 0) := Mb(M_SIZE+2 downto 0);      
      --      end if;
      
              
            -- outputs
            S12Modes((i+1)*MODE_VECTOR_SIZE-1 downto i*MODE_VECTOR_SIZE) <=
              Modes;
            S12DE((i+1)*(E_SIZE+1)-1 downto i*(E_SIZE+1)) <= 
              DE(E_SIZE+1-1 downto 0);
            S12Repr(i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE-1 downto i*6*2)
              <= ReprA;
            S12Repr(i*REPRES_VECTOR_SIZE*2+
                    REPRES_VECTOR_SIZE+REPRES_VECTOR_SIZE-1 
                    downto i*REPRES_VECTOR_SIZE*2+
                    REPRES_VECTOR_SIZE) <= ReprB;
            S12Fa((i+1)*SIZE-1 downto i*SIZE) <= Fa(SIZE-1 downto 0);
            S12Fb((i+1)*SIZE-1 downto i*SIZE) <= Fb(SIZE-1 downto 0);
            S12Ma((i+1)*(M_SIZE+1)-1 downto i*(M_SIZE+1)) <=
              Ma(M_SIZE+1-1 downto 0);
              -- fill S12M* vector because the 3 MSB are undefined
              -- (SGL_M_SIZE+2)*2 = 52
              --  DBL_M_SIZE+2    = 55
--              S12Ma(M_SIZE+2-1 downto (i+1)*(M_SIZE+2)) <= (others => '0');
--          rem : is it really needed to fill unset bit in vector?
            S12Mb((i+1)*(M_SIZE+1)-1 downto i*(M_SIZE+1)) <= 
              Mb(M_SIZE+1-1 downto 0);
                    
--            S12Mb(M_SIZE+2-1 downto (i+1)*(M_SIZE+2))  <= (others => '0');
            S12SuperMode <= S1SuperMode;

          end loop;
          -- enable stage 2
          S2Enable <= '1';
        else
          S2Enable <= '0';
        end if;
      end if;
    end if;    
  end process;














  stage_2 : process (Clk, Rst)


     -- input signals as single floats
     variable Fa    : std_ulogic_vector(DBL_SIZE-1 downto 0);         
     variable Fb    : std_ulogic_vector(DBL_SIZE-1 downto 0);
      -- mode vector for current 32-bit block
     variable Modes : std_ulogic_vector(MODE_VECTOR_SIZE-1 downto 0);
     -- representation of float A
     variable ReprA : std_ulogic_vector(REPRES_VECTOR_SIZE-1 downto 0);
     variable ReprB : std_ulogic_vector(REPRES_VECTOR_SIZE-1 downto 0);
     variable ReprAB : std_ulogic_vector(2*(REPRES_VECTOR_SIZE-1)-1 downto 0);
     variable DE    : std_ulogic_vector(DBL_E_SIZE+1-1 downto 0);       
     variable NP    : natural;                                   
          -- Posision of normalised/denormalised flag 
          -- for input characterization
          -- Mode(TNP)   for Fa
          -- Mode(TNP+1) for Fb
    variable Ma     : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);   
    variable Mb     : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);     
      -- max exponent
    variable Exp     : std_ulogic_vector(2*SGL_E_SIZE-1 downto 0);      
    variable S       : std_ulogic;
    -- 2*size(single_exponent) > size(double_exponent)
    variable FT      : t_float;
    variable L       : natural;
    variable SIZE    : natural;
    variable E_SIZE  : natural;
    variable E_Start : natural;
    variable E_End   : natural;
    variable M_SIZE  : natural;
    variable M_Start : natural;
    variable M_End   : natural;
    variable S_Pos   : natural;
  begin
    if (Rst = '1') then
      S3Enable <= '0';
    else
       if (rising_edge(Clk)) then
        if (S2Enable = '1') then
          if (S12SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
              = Mode_SIMD_Single) then -- 32 bit SIMD Mode
              
            FT := t_single;
            L := 2;
            SIZE    := SGL_SIZE;
            E_SIZE  := SGL_E_SIZE;
            E_Start := SGL_E_Start;
            E_End   := SGL_E_End;
            M_SIZE  := SGL_M_SIZE;
            M_Start := SGL_M_Start;
            M_End   := SGL_M_End;
            S_Pos   := SGL_S_Pos;
            
          elsif (S12SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
                 = Mode_SIMD_Double) then -- 64 bit SIMD Mode
                 
            FT := t_single;
            L := 1;
            SIZE    := DBL_SIZE;
            E_SIZE  := DBL_E_SIZE;
            E_Start := DBL_E_Start;
            E_End   := DBL_E_End;
            M_SIZE  := DBL_M_SIZE;
            M_Start := DBL_M_Start;
            M_End   := DBL_M_End;
            S_Pos   := DBL_S_Pos;
          else
            report "Unknown SIMD format" severity failure;
          end if;
          
            for i in 0 to L*BLOCK64_NBR-1 loop -- for each 32 bit block
             
               -- inputs
              Fa(SIZE-1 downto 0)    := S12Fa((i+1)*SIZE-1 
                                                   downto i*SIZE);
              Fb(SIZE-1 downto 0)    := S12Fb((i+1)*SIZE-1
                                                   downto i*SIZE);
              Modes                      := S12Modes((i+1)*MODE_VECTOR_SIZE-1 
                                                    downto i*MODE_VECTOR_SIZE);
              ReprA                      := S12Repr(i*REPRES_VECTOR_SIZE*2+
                                                     REPRES_VECTOR_SIZE-1   
                                                     downto 
                                                     i*REPRES_VECTOR_SIZE*2  );
              ReprB                      := S12Repr(i*REPRES_VECTOR_SIZE*2+
                                                     REPRES_VECTOR_SIZE+
                                                     REPRES_VECTOR_SIZE-1 
                                                     downto 
                                                     i*REPRES_VECTOR_SIZE*2
                                                     +REPRES_VECTOR_SIZE);
              DE(E_SIZE+1-1 downto 0)  := S12DE((i+1)*(E_SIZE+1)-1
                                                     downto i*(E_SIZE+1));

              Ma(M_SIZE+1-1 downto 0) := S12Ma((i+1)*(M_SIZE+1)-1
                                                     downto i*(M_SIZE+1));
              -- invert "packed mantissa MSB" bit
              --(which not the msb anymore) of the mantissa 
              -- if repres=normalised
              if (ReprA (REPRES_NORMALISED) = '1') then
                Ma(M_SIZE)           := '1';
              end if;              
              
              Mb(M_SIZE+1-1 downto 0) := S12Mb((i+1)*(M_SIZE+1)-1
                                                     downto i*(M_SIZE+1));
              if (ReprB (REPRES_NORMALISED) = '1') then
                Mb(M_SIZE  )         := '1';
              end if;
              
                                                   
              Modes                      := S12Modes((i+1)*MODE_VECTOR_SIZE-1 
                                                    downto i*MODE_VECTOR_SIZE);
                                                  
    

              if (DE(SGL_E_SIZE) = '0') then -- DE >= 0 => Ea >= Eb
              --Mb right shift
                if (FT = t_single) then
                  Mb(M_SIZE+1 downto 0) := fasu_rshift(Mb(M_SIZE+1 downto 0),
                                                          DE(5 downto 0));
                else
                  Mb(M_SIZE+1 downto 0) := fasu_rshift(Mb(M_SIZE+1 downto 0),
                                                          DE(6 downto 0));
                end if;
                Exp(E_SIZE-1 downto 0)  := Fa(E_End downto E_Start);              
              else
                -- Ea < Eb
                -- Ma right shift
                if (FT = t_single) then
                  Ma(M_SIZE+1 downto 0) := fasu_rshift(Ma(M_SIZE+1 downto 0),
                                                          DE(5 downto 0));
                else
                  Ma(M_SIZE+1 downto 0) := fasu_rshift(Ma(M_SIZE+1 downto 0),
                                                          DE(6 downto 0));
                end if;
                Exp(E_SIZE-1 downto 0)  := Fb(E_End downto E_Start);
              end if;
      
      
              -- preparation of next stage: 
              -- if we have a substraction, we will use this formula:
              -- A - B = not(not(A) + B)
              -- so we have to invert Ma if we will do a substraction
      --        if (SuperMode(Super_Mode_Sub) = '1') then
      --          Ma := not Ma;
      --        end if;
      
      --      -- in parrallel prepare mantissas for the next stage
              S := (Fa(SGL_S_POS) xor Fb(SGL_S_POS) xor 
                  S12SuperMode(Super_Mode_Sub));
              if (S = '1') then
                Ma := not Ma;
              end if;
      
      
              -- in parallel finish the checks
              ReprAB := ReprA(REPRES_VECTOR_SIZE-1-1 downto 0) & 
                         ReprB(REPRES_VECTOR_SIZE-1-1 downto 0);
              --TODO: replace these hard coded value by constants
              case ReprAB is 
                when ("00001" & "00001") => -- A normalised -- B normalised
                  -- nothing to do
                when ("00001" & "00010") => -- A normalised -- B denormalised
                  -- nothing to do
                when ("00001" & "00100") => -- A normalised -- B infinite
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                  if S12SuperMode(Super_Mode_Sub) = '0' then
                    Modes(Mode_SO0) := ReprB(REPRES_MSB);
                  else
                    Modes(Mode_SO0) := not ReprB(REPRES_MSB);
                  end if;
                  Modes(Mode_DONE) := '1';
                when ("00001" & "01000") => -- A normalised -- B zero
                  Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_Fa;
                  Modes(Mode_DONE) := '1';                
                when ("00001" & "10000") => -- A normalised -- B NaN
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
                  
                      
                      
                when ("00010" & "00001") => -- A denormalised -- B normalised
                  -- nothing to do
                when ("00010" & "00010") => -- A denormalised -- B denormalised
                  -- nothing to do
                when ("00010" & "00100") => -- A denormalised -- B infinite
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                  if S12SuperMode(Super_Mode_Sub) = '0' then
                    Modes(Mode_SO0) := ReprB(REPRES_MSB);
                  else
                    Modes(Mode_SO0) := not ReprB(REPRES_MSB);
                  end if;
                  Modes(Mode_DONE) := '1';
                when ("00010" & "01000") => -- A denormalised -- B zero
                  Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_Fa;
                  Modes(Mode_DONE) := '1';                
                when ("00010" & "10000") => -- A denormalised -- B NaN
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
                
                  
                when ("00100" & "00001") => -- A infinite -- B normalised
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                  Modes(Mode_SO0) := ReprB(REPRES_MSB);
                  Modes(Mode_DONE) := '1';                
                when ("00100" & "00010") => -- A infinite -- B denormalised
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                  Modes(Mode_SO0) := ReprB(REPRES_MSB);
                  Modes(Mode_DONE) := '1';                
                when ("00100" & "00100") => -- A infinite -- B infinite
                  if S12SuperMode(Super_Mode_Sub) = '0' then -- addition
                    if (ReprA(REPRES_MSB) = ReprB(REPRES_MSB)) then -- same sign
                      Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                      Modes(Mode_SO0) := ReprA(REPRES_MSB);
                    else -- NaN
                      Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_QNaN;
                    end if;
                  else
                    if (ReprA(REPRES_MSB) /= ReprB(REPRES_MSB)) then -- sign /=
                      Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                      Modes(Mode_SO0) := ReprA(REPRES_MSB);
                    else -- NaN
                      Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_QNaN;
                    end if;
                  end if;    
                  Modes(Mode_DONE) := '1';
                when ("00100" & "01000") => -- A infinite -- B zero
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                  Modes(Mode_SO0) := ReprB(REPRES_MSB);
                  Modes(Mode_DONE) := '1';                
                when ("00100" & "10000") => -- A infinite -- B NaN
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
                  
                      
                when ("01000" & "00001") => -- A zero -- B normalised
                  Fa(SIZE-1 downto 0) := Fb(SIZE-1 downto 0);
                  Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_Fa;
                  Modes(Mode_DONE) := '1';                
                when ("01000" & "00010") => -- A zero -- B denormalised
                  Fa(SIZE-1 downto 0) := Fb(SIZE-1 downto 0);
                  Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_Fa;
                  Modes(Mode_DONE) := '1';                
                when ("01000" & "00100") => -- A zero -- B infinite
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_INF(2 downto 1);
                  if S12SuperMode(Super_Mode_Sub) = '0' then
                    Modes(Mode_SO0) := ReprB(REPRES_MSB);
                  else
                    Modes(Mode_SO0) := not ReprB(REPRES_MSB);
                  end if;
                  Modes(Mode_DONE) := '1';
                when ("01000" & "01000") => -- A zero -- B zero
                  Modes(Mode_SO2 downto Mode_SO0) := Mode_SO_ZERO(2 downto 0);
                  Modes(Mode_DONE) := '1';                
                when ("01000" & "10000") => -- A zero -- B NaN
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
      
      
                when ("10000" & "00001") => -- A NaN -- B normalised
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
                when ("10000" & "00010") => -- A NaN -- B denormalised
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
                when ("10000" & "00100") => -- A NaN -- B infinite
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                
                  Modes(Mode_DONE) := '1';
                when ("10000" & "01000") => -- A NaN -- B zero
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := ReprB(REPRES_MSB); -- Q or S NaN
                  Modes(Mode_DONE) := '1';                              S23Repr(i*REPRES_VECTOR_SIZE*2+
                      REPRES_VECTOR_SIZE+REPRES_VECTOR_SIZE-1
                      downto i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE)
                      <= ReprB;

                when ("10000" & "10000") => -- A NaN -- B NaN
                  Modes(Mode_SO2 downto Mode_SO1) := Mode_SO_QNaN(2 downto 1);
                  Modes(Mode_SO1) := (ReprA(REPRES_MSB) and ReprB(REPRES_MSB) );
                  -- if both are QNaN then result is QNaN, otherwise it's SNaN
                  Modes(Mode_DONE) := '1';                
                  
                 when others =>
                   report "beurk" severity failure;
                   -- TODO: what to do in synthetiser mode because this is not
                   -- synthetisable...
                   
               end case;




             
              
              -- outputs
              S23SuperMode <= S12SuperMode;
              S23Modes((i+1)*MODE_VECTOR_SIZE-1 
                       downto i*MODE_VECTOR_SIZE) <= Modes;
              S23Fa((i+1)*SIZE-1 downto i*SIZE) 
                    <= Fa(SIZE-1 downto 0);
              S23Fb((i+1)*SIZE-1 downto i*SIZE)
                    <= Fb(SIZE-1 downto 0);
              S23Repr(i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE-1 
                      downto i*REPRES_VECTOR_SIZE*2  ) <= ReprA;
              S23Repr(i*REPRES_VECTOR_SIZE*2+
                      REPRES_VECTOR_SIZE+REPRES_VECTOR_SIZE-1
                      downto i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE)
                      <= ReprB;
              S23Ma((i+1)*(M_SIZE+1)-1 downto i*(M_SIZE+1))
                    <= Ma(M_SIZE+1-1 downto 0);
--              S23Ma(DBL_M_SIZE+2-1 downto (i+1)*(M_SIZE+2)) 
--                                                            <= (others => '0');
              S23Mb((i+1)*(M_SIZE+1)-1 downto i*(M_SIZE+1))
                    <= Mb(M_SIZE+1-1 downto 0);
--              S23Mb(DBL_M_SIZE+2-1 downto (i+1)*(M_SIZE+2)) 
--                                                            <= (others => '0');

              S23E ((i+1)*E_SIZE-1 downto i*E_SIZE)
                    <= Exp(E_SIZE-1 downto 0);
              S23S (i) <= S;

            end loop;

          -- enable stage 3
          S3Enable <= '1';
        else
          S3Enable <= '0';
        end if;
      end if;
    end if;
  end process;










  stage_3 : process (Clk, Rst)

    
    variable Fa    : std_ulogic_vector(DBL_SIZE-1 downto 0);         
    variable Fb    : std_ulogic_vector(DBL_SIZE-1 downto 0);
    variable Modes : std_ulogic_vector(MODE_VECTOR_SIZE-1 downto 0);
    variable ReprA : std_ulogic_vector(REPRES_VECTOR_SIZE-1 downto 0);
    variable ReprB : std_ulogic_vector(REPRES_VECTOR_SIZE-1 downto 0);
    variable NP    : natural;                                   
    variable Ma    : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);   
    variable Mb    : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);     
    variable MMa     : std_ulogic_vector(16*4-1 downto 0);   
    variable MMb     : std_ulogic_vector(16*4-1 downto 0);     
    variable MMr     : std_ulogic_vector(16*4-1 downto 0);   
    variable MMri    : std_ulogic_vector(16*4-1 downto 0);   
    variable GG      : std_ulogic_vector(4-1 downto 0);     
    variable PP      : std_ulogic_vector(4-1 downto 0);     
    variable Exp     : std_ulogic_vector(2*SGL_E_SIZE-1 downto 0);      
    variable S       : std_ulogic;

    variable SIZE    : natural;
    variable E_SIZE  : natural;
    variable E_Start : natural;
    variable E_End   : natural;
    variable M_SIZE  : natural;
    variable M_Start : natural;
    variable M_End   : natural;
    variable S_Pos   : natural;
     variable FT     : t_float;
    variable L       : natural;
  begin
  
    if (Rst = '1') then
      S4Enable <= '0';
    else
       if (rising_edge(Clk)) then
        if (S3Enable = '1') then
          
          if (S23SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
              = Mode_SIMD_Single) then -- 32 bit SIMD Mode
              
            FT := t_single;
            L := 2;
            SIZE    := SGL_SIZE;
            E_SIZE  := SGL_E_SIZE;
            E_Start := SGL_E_Start;
            E_End   := SGL_E_End;
            M_SIZE  := SGL_M_SIZE;
            M_Start := SGL_M_Start;
            M_End   := SGL_M_End;
            S_Pos   := SGL_S_Pos;
            
          elsif (S23SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
                 = Mode_SIMD_Double) then -- 64 bit SIMD Mode
                 
            FT := t_single;
            L := 1;
            SIZE    := DBL_SIZE;
            E_SIZE  := DBL_E_SIZE;
            E_Start := DBL_E_Start;
            E_End   := DBL_E_End;
            M_SIZE  := DBL_M_SIZE;
            M_Start := DBL_M_Start;
            M_End   := DBL_M_End;
            S_Pos   := DBL_S_Pos;
          else
            report "Unknown SIMD format" severity failure;
          end if;


            for i in 0 to L*BLOCK64_NBR-1 loop -- for each 64 or 32 bit block
             
               -- inputs
              Fa(SIZE-1 downto 0)        := S23Fa((i+1)*SIZE-1 
                                                   downto i*SIZE);
              Fb(SIZE-1 downto 0)        := S23Fb((i+1)*SIZE-1
                                                   downto i*SIZE);
              Modes                      := S23Modes((i+1)*MODE_VECTOR_SIZE-1 
                                                    downto i*MODE_VECTOR_SIZE);
              ReprA                      := S23Repr(i*REPRES_VECTOR_SIZE*2+
                                                     REPRES_VECTOR_SIZE-1   
                                                     downto 
                                                     i*REPRES_VECTOR_SIZE*2  );
              ReprB                      := S23Repr(i*REPRES_VECTOR_SIZE*2+
                                                     REPRES_VECTOR_SIZE+
                                                     REPRES_VECTOR_SIZE-1 
                                                     downto 
                                                     i*REPRES_VECTOR_SIZE*2
                                                     +REPRES_VECTOR_SIZE);
              Ma(M_SIZE+1-1 downto 0)   := S23Ma((i+1)*(M_SIZE+1)-1
                                                     downto i*(M_SIZE+1));
              Mb(M_SIZE+1-1 downto 0)   := S23Mb((i+1)*(M_SIZE+1)-1
                                                     downto i*(M_SIZE+1));

              Exp(E_SIZE-1 downto 0) := S23E((i+1)*E_SIZE-1 downto i*E_SIZE);
              S := S23S (i);

          	mma(64-1 downto 0) := (others => '0');
          	mmb(64-1 downto 0) := (others => '0');
          	GG(4-1 downto 0) := (others => '0');
          	PP(4-1 downto 0) := (others => '0');
                                           
            if Modes(Mode_DONE) = '0' then
              -- don't care about mantissa sign or anything else, just add
              -- we do here n 16-bit addition in parallel
              -- for each addition we get 2 results : A+B and A+B+1. We store it into
              -- the vector to be given to the next stage
              -- we also get 2 carry bit: first is Carry out and the second is the 
              -- "carry in may propagate to carry out" bit (cf generic_adder)
              -- we will deal with all these results in the next stage
              if FT = t_single then -- 23 + 2 bit mantissa = 16 + 9
              -- left align and split mantissa into 16 bit adders
                if (i mod 2 = 0) then -- first 32 bit float add
                  mma(31 downto 16) := Ma (M_End+1    downto M_End+1-16+1);
                  mma(15 downto  8) := Ma (M_End+1-16 downto M_End+1-16-8+1);
                  mma( 7 downto  0) := (others => '0');
                  mmb(31 downto 16) := Mb (M_End+1    downto M_End+1-16+1);
                  mmb(15 downto  8) := Mb (M_End+1-16 downto M_End+1-16-8+1);
                  mmb( 7 downto  0) := (others => '0');
                 else                   -- second 32 bit float add        
                  mma(63 downto 48) := Ma (M_End+1    downto M_End+1-16+1);
                  mma(47 downto 40) := Ma (M_End+1-16 downto M_End+1-16-8+1);
                  mma(39 downto 32) := (others => '0');
                  mmb(63 downto 48) := Mb (M_End+1    downto M_End+1-16+1);
                  mmb(47 downto 40) := Mb (M_End+1-16 downto M_End+1-16-8+1);
                  mmb(39 downto 32) := (others => '0');
                end if;
              elsif FT = t_double then -- 52 + 2 = 16 + 16 + 16 + 6
                mma(63 downto 48) := Ma (M_End+1      downto M_End+1-16+1);
                mmb(63 downto 48) := Mb (M_End+1      downto M_End+1-16+1);

                mma(47 downto 32) := Ma (M_End+1-1*16 downto M_End+1-2*16+1);
                mmb(47 downto 32) := Mb (M_End+1-1*16 downto M_End+1-2*16+1);

                mma(31 downto 16) := Ma (M_End+1-2*16 downto M_End+1-3*16+1);
                mmb(31 downto 16) := Mb (M_End+1-2*16 downto M_End+1-3*16+1);

                mma(15 downto 11)  := Ma (M_End+1-3*16 downto M_End+1-3*16-5+1);
                mma(10 downto  0)  := (others => '0');
                mmb(15 downto 11)  := Mb (M_End+1-3*16 downto M_End+1-3*16-5+1);
                mmb(10 downto  0)  := (others => '0');

              end if;
              for j in i*2 to i*2 + 4/L-1 loop
                -- do all the 4 additions in the same time and
                -- send results (A+B and A+B+1) and carrys out to
                -- the next stage
                fasu_add(MMa((j+1)*16-1 downto j*16), MMb ((j+1)*16-1 downto j*16) , 
                         MMr((j+1)*16-1 downto j*16), MMri((j+1)*16-1 downto j*16), 
                         GG(j), PP(j));
              end loop;
      --        Mout(115 downto 52) := (others => '0'); -- needed?
            end if;
              
              -- outputs
              S34Fa((i+1)*SIZE-1 downto i*SIZE) 
                    <= Fa(SIZE-1 downto 0);
              S34Fb((i+1)*SIZE-1 downto i*SIZE)
                    <= Fb(SIZE-1 downto 0);
              S34SuperMode <= S23SuperMode;
              S34Modes((i+1)*MODE_VECTOR_SIZE-1 
                       downto i*MODE_VECTOR_SIZE) <= Modes;
              S34Repr(i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE-1 
                      downto i*REPRES_VECTOR_SIZE*2  ) <= ReprA;
              S34Repr(i*REPRES_VECTOR_SIZE*2+
                      REPRES_VECTOR_SIZE+REPRES_VECTOR_SIZE-1
                      downto i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE)
                      <= ReprB;
--              S34Ma((i+1)*(M_SIZE+2)-1 downto i*(M_SIZE+2)) <=
--                                                  Ma(M_SIZE+2-1 downto 0);
--              S34Mb((i+1)*(M_SIZE+2)-1 downto i*(M_SIZE+2)) <=
--                                                  Mb(M_SIZE+2-1 downto 0);
              if (FT = t_single) then
                S34M ((i+1)*(2*16)-1 downto i*(2*16)) <= MMr((2*16)-1 downto 0);
                S34Mi((i+1)*(2*16)-1 downto i*(2*16)) <= MMri((2*16)-1 downto 0);
                S34G ((i+1)*2-1 downto i*2) <= GG(2-1 downto 0);
                S34P ((i+1)*2-1 downto i*2) <= PP(2-1 downto 0);
              else -- FT = t_double
                S34M ((i+1)*(4*16)-1 downto i*(4*16)) <= MMr((4*16)-1 downto 0);
                S34Mi((i+1)*(4*16)-1 downto i*(4*16)) <= MMri((4*16)-1 downto 0);
                S34G ((i+1)*4-1 downto i*4) <= GG(4-1 downto 0);
                S34P ((i+1)*4-1 downto i*4) <= PP(4-1 downto 0);
              end if;
              S34E ((i+1)*E_SIZE-1 downto i*E_SIZE)
                    <= Exp(E_SIZE-1 downto 0);
              S34S (i) <= S;
            end loop;
            
          -- enable stage 4
          S4Enable <= '1';
        else
          S4Enable <= '0';
        end if;
      end if;
    end if;
  end process;


-- last 2 stages:
-- retouch mantissa to make it in the right form, 
-- shift mantissa (normalisation)
-- and decode Special outputs if set

  stage_4 : process (Clk, Rst)

    
    variable Fa     : std_ulogic_vector(DBL_SIZE-1 downto 0);         
    variable Fb     : std_ulogic_vector(DBL_SIZE-1 downto 0);
    variable Modes  : std_ulogic_vector(MODE_VECTOR_SIZE-1 downto 0);
    variable ReprA  : std_ulogic_vector(REPRES_VECTOR_SIZE-1 downto 0);
    variable ReprB  : std_ulogic_vector(REPRES_VECTOR_SIZE-1 downto 0);
    variable NP     : natural;                                   
    variable Ma     : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);   
    variable Mb     : std_ulogic_vector(DBL_M_SIZE+1-1 downto 0);     
    variable MMr    : std_ulogic_vector(16*4-1 downto 0);   
    variable MMri   : std_ulogic_vector(16*4-1 downto 0);   
    variable GG     : std_ulogic_vector(4-1 downto 0);     
    variable PP     : std_ulogic_vector(4-1 downto 0);     
    variable SS, TT : std_ulogic_vector(4-1 downto 0);
    variable Gout, Pout : std_ulogic_vector(0 downto 0);
    variable Exp    : std_ulogic_vector(2*SGL_E_SIZE-1 downto 0);      
    variable Man    : std_ulogic_vector(64-1 downto 0);   
    variable S      : std_ulogic;

    variable SIZE    : natural;
    variable E_SIZE  : natural;
    variable E_Start : natural;
    variable E_End   : natural;
    variable M_SIZE  : natural;
    variable M_Start : natural;
    variable M_End   : natural;
    variable S_Pos   : natural;
    variable FT      : t_float;
    variable L       : natural;
  begin
  
    if (Rst = '1') then
      S5Enable <= '0';
    else
       if (rising_edge(Clk)) then
        if (S4Enable = '1') then
          
          if (S34SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
              = Mode_SIMD_Single) then -- 32 bit SIMD Mode
              
            FT := t_single;
            L := 2;
            SIZE    := SGL_SIZE;
            E_SIZE  := SGL_E_SIZE;
            E_Start := SGL_E_Start;
            E_End   := SGL_E_End;
            M_SIZE  := SGL_M_SIZE;
            M_Start := SGL_M_Start;
            M_End   := SGL_M_End;
            S_Pos   := SGL_S_Pos;
            
          elsif (S34SuperMode(Super_Mode_SIMD1 downto Super_Mode_SIMD0) 
                 = Mode_SIMD_Double) then -- 64 bit SIMD Mode
                 
            FT := t_single;
            L := 1;
            SIZE    := DBL_SIZE;
            E_SIZE  := DBL_E_SIZE;
            E_Start := DBL_E_Start;
            E_End   := DBL_E_End;
            M_SIZE  := DBL_M_SIZE;
            M_Start := DBL_M_Start;
            M_End   := DBL_M_End;
            S_Pos   := DBL_S_Pos;
          else
            report "Unknown SIMD format" severity failure;
          end if;
          
          for i in 0 to BLOCK64_NBR-1 loop -- for each 64 bit block !!!
          -- removed modes tests cause it
          
            if S34Modes(2*i*MODE_VECTOR_SIZE + Mode_DONE) = '0' or 
              S34Modes(2*i*MODE_VECTOR_SIZE + MODE_VECTOR_SIZE + Mode_DONE) = '0' then
              MMr ((4*16)-1 downto 0) := S34M ((i+1)*(4*16)-1 downto i*(4*16));
              MMri((4*16)-1 downto 0) := S34Mi((i+1)*(4*16)-1 downto i*(4*16));
              GG(4-1 downto 0) := S34G ((i+1)*4-1 downto i*4);
              PP(4-1 downto 0) := S34P ((i+1)*4-1 downto i*4);
              -- carry select vector:
              CSV(GG, PP, SS, TT);
              -- carry look ahead
              CLA(GG, PP, Gout, Pout);
              for j in 0 to 4-1 loop
                if (FT = t_single) then
                  if (j < 2) then
                    S := S23S (2*i);
                  else
                    S := S23S (2*i+1);
                  end if;
                else
                  S := S23S (i);
                end if;
                if (S =  '0') then -- if result sign is known=> use vector SS
                  -- select correct output : use A+B or A+B+1 result?
                  if (SS(j) = '1') then
                    Man((j+1)*16-1 downto j*16) := MMri((j+1)*16-1 downto j*16);
                  else
                    Man((j+1)*16-1 downto j*16) := MMr ((j+1)*16-1 downto j*16);
                  end if;
                else -- if result sign is unknown => use vector TT
                  if (TT(j) = '1') then
                    Man((j+1)*16-1 downto j*16) := MMri((j+1)*16-1 downto j*16);
                  else
                    Man((j+1)*16-1 downto j*16) := MMr ((j+1)*16-1 downto j*16);
                  end if;
                end if;
              end loop;
              if (FT = t_single) then
                -- first single result:
                S45M ((2*i+1)*(SGL_M_SIZE+1)-1 downto 2*i*(SGL_M_SIZE+1))
                  <= Man(31 downto 8);
                S45C(2*i) <= Man(32);
                -- second single result
                S45M ((2*i+1)*(SGL_M_SIZE+1)+SGL_M_SIZE+1-1 downto 2*i*(SGL_M_SIZE+1)+SGL_M_SIZE+1)
                  <= Man(63 downto 40);
	              S45C(2*i) <= Gout(0); -- or Pout?
              else
                -- only 54 bit result:
                S45M (((i+1))*(DBL_M_SIZE+1)-1 downto i*(DBL_M_SIZE+1))
                  <= Man(63 downto 11);
	              S45C(i) <= Gout(0); -- or Pout?
              end if;
            end if;
          end loop;

          for i in 0 to L*BLOCK64_NBR-1 loop -- for each 64 or 32 bit block !!!
              -- inputs
              Fa(SIZE-1 downto 0)        := S34Fa((i+1)*SIZE-1 downto i*SIZE);
              Fb(SIZE-1 downto 0)        := S34Fb((i+1)*SIZE-1
                                                   downto i*SIZE);
              Modes                      := S34Modes((i+1)*MODE_VECTOR_SIZE-1 
                                                    downto i*MODE_VECTOR_SIZE);
              ReprA                      := S34Repr(i*REPRES_VECTOR_SIZE*2+
                                                     REPRES_VECTOR_SIZE-1   
                                                     downto 
                                                     i*REPRES_VECTOR_SIZE*2  );
              ReprB                      := S34Repr(i*REPRES_VECTOR_SIZE*2+
                                                     REPRES_VECTOR_SIZE+
                                                     REPRES_VECTOR_SIZE-1 
                                                     downto 
                                                     i*REPRES_VECTOR_SIZE*2
                                                     +REPRES_VECTOR_SIZE);
--              Ma(M_SIZE+1-1 downto 0)   := S34Ma((i+1)*(M_SIZE+1)-1
--                                                     downto i*(M_SIZE+1));
--              Mb(M_SIZE+1-1 downto 0)   := S34Mb((i+1)*(M_SIZE+1)-1
--                                                     downto i*(M_SIZE+1));
              Exp(E_SIZE-1 downto 0)    := S34E ((i+1)*E_SIZE-1 downto i*E_SIZE);

              -- nothing to do...
              
              -- outputs
              S45SuperMode <= S23SuperMode;
              S45Modes((i+1)*MODE_VECTOR_SIZE-1 
                       downto i*MODE_VECTOR_SIZE) <= Modes;
              S45Repr(i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE-1 
                      downto i*REPRES_VECTOR_SIZE*2  ) <= ReprA;
              S45Repr(i*REPRES_VECTOR_SIZE*2+
                      REPRES_VECTOR_SIZE+REPRES_VECTOR_SIZE-1
                      downto i*REPRES_VECTOR_SIZE*2+REPRES_VECTOR_SIZE)
                      <= ReprB;
              S45E ((i+1)*E_SIZE-1 downto i*E_SIZE)
                    <= Exp(E_SIZE-1 downto 0);
            end loop;
            
          -- enable stage 5
          S5Enable <= '1';
        else
          S5Enable <= '0';
        end if;
      end if;
    end if;
  end process;

-- last stage : normalization



--  -- note: S5M is given by the result of S4ManAdd (IAdd)
--  -- S5M = aligned mantissa 1 +/- aligned mantissa 2
--  -- stage5 : normalization and output
--  stage_5 : process(Clk, Rst)
--    variable S5MP : std_ulogic_vector(MSIZE+1 downto 0);
--    variable S5Mo : std_ulogic_vector(MSIZE-1 downto 0);
--    variable S5EP : std_ulogic_vector(ESIZE-1 downto 0);
--    variable S5Eo : std_ulogic_vector(ESIZE-1 downto 0);
--    variable S5So : std_ulogic;
--  begin
--    if (Rst = '1') then
--      --S7start <= '0';
--    else
--       if (rising_edge(Clk)) then
--        if (En = '1') then
--          if (S4out = '1') then -- stage 1 already has the result
--            S5Fout <= S4Fout;
--            S5FExout <= S4FExout;
--            --S5out <='1';
--          else
--            --S5out <='0';
--            --S5start <= '0';
--            S5FExout <= (others => '0');    
--            S5So := S5M(MSIZE + 2);
--            if (S5M(MSIZE + 2) = '1') then -- if result mantissa < 0
--              S5MP(MSIZE+1 downto 0) := SGL_neg(S5M(MSIZE+1 downto 0));
--              S5So := '1';  -- negative output
--            elsif (S5M(MSIZE + 2) = '0') then
--              S5MP(MSIZE+1 downto 0) := S5M(MSIZE+1 downto 0);
--              S5So := '0';            
--            end if;
--            if (S5MP(MSIZE + 1) = '1') then -- result mantissa has an overflow (Fa, Fb same sign for addition)
--                                            -- ie, the MSB has moved
--                                            -- so: juste right shift by 1
--                                            -- note: loose precision
--              S5Mo(MSIZE-1 downto 0) := S5MP(MSIZE downto 1);
--              -- note: expected: S5MP(MSIZE downto 0) <= S5M(MSIZE+1 downto 1);
--              -- but we remove in the same step the MSB which is now '1'
--              S5Eo := SGL_incr(S5E);
--            elsif (S5MP(MSIZE + 1) = '0') then 
--                 -- higher bits are NULL so we have to left shift until
--                 -- the MSB (bit number MSIZE) become '1'
--                 -- occurs when Fa and Fb doesn't have same sign and |Fa| < |Fb|
--              S5EP := S5E;
--              if (SGL_isnull(S5MP) = false) then
--                while (S5MP(MSIZE) /= '1') loop
--                break;
--                  S5EP := fl_decr(S5EP);
--                   S5MP(MSIZE downto 1) := S5MP(MSIZE-1 downto 0);
--                   S5MP(0) := '0';
--                end loop;
--              else -- null mantissa
--              end if;
--              S5Eo := S5E;
--              S5Mo := S5MP(MSIZE-1 downto 0); --removing MSB
--            end if;
--            S5Fout(FLSIZE-1) <= S5So;
--            S5Fout(FLSIZE-1-1 downto MSIZE) <= S5Eo;
--            S5Fout(MSIZE-1 downto 0) <= S5Mo;
--          end if;
--        end if;   
--      end if;    
--    end if;  
--  end process;
--  
--  Fout <= S5Fout;
--  FExout <= S5FExout;
--  
end Behav_1;































