/* spxprim.c */

/***********************************************************************
*  This code is part of GLPK (GNU Linear Programming Kit).
*  Copyright (C) 2015-2017 Free Software Foundation, Inc.
*  Written by Andrew Makhorin <mao@gnu.org>.
*
*  GLPK is free software: you can redistribute it and/or modify it
*  under the terms of the GNU General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*
*  GLPK is distributed in the hope that it will be useful, but WITHOUT
*  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
*  License for more details.
*
*  You should have received a copy of the GNU General Public License
*  along with GLPK. If not, see <http://www.gnu.org/licenses/>.
***********************************************************************/

#if 1 /* 18/VII-2017 */
#define SCALE_Z 1
#endif

#include "env.h"
#include "simplex.h"
#include "spxat.h"
#include "spxnt.h"
#include "spxchuzc.h"
#include "spxchuzr.h"
#include "spxprob.h"

#include "bfd.h"
#include "fhvint.h"
#include "scfint.h"

#define CHECK_ACCURACY 0
/* (for debugging) */


struct csa
{     /* common storage area */
      SPXLP *lp;
      /* LP problem data and its (current) basis; this LP has m rows
       * and n columns */
      int dir;
      /* original optimization direction:
       * +1 - minimization
       * -1 - maximization */
#if SCALE_Z
      double fz;
      /* factor used to scale original objective */
#endif
      double *orig_c; /* double orig_c[1+n]; */
      /* copy of original objective coefficients */
      double *orig_l; /* double orig_l[1+n]; */
      /* copy of original lower bounds */
      double *orig_u; /* double orig_u[1+n]; */
      /* copy of original upper bounds */
      SPXAT *at;
      /* mxn-matrix A of constraint coefficients, in sparse row-wise
       * format (NULL if not used) */
      SPXNT *nt;
      /* mx(n-m)-matrix N composed of non-basic columns of constraint
       * matrix A, in sparse row-wise format (NULL if not used) */
      int phase;
      /* search phase:
       * 0 - not determined yet
       * 1 - searching for primal feasible solution
       * 2 - searching for optimal solution */
      double *beta; /* double beta[1+m]; */
      /* beta[i] is a primal value of basic variable xB[i] */
      int beta_st;
      /* status of the vector beta:
       * 0 - undefined
       * 1 - just computed
       * 2 - updated */
      double *d; /* double d[1+n-m]; */
      /* d[j] is a reduced cost of non-basic variable xN[j] */
      int d_st;
      /* status of the vector d:
       * 0 - undefined
       * 1 - just computed
       * 2 - updated */
      SPXSE *se;
      /* projected steepest edge and Devex pricing data block (NULL if
       * not used) */
      int num;
      /* number of eligible non-basic variables */
      int *list; /* int list[1+n-m]; */
      /* list[1], ..., list[num] are indices j of eligible non-basic
       * variables xN[j] */
      int q;
      /* xN[q] is a non-basic variable chosen to enter the basis */
#if 0 /* 11/VI-2017 */
      double *tcol; /* double tcol[1+m]; */
#else
      FVS tcol; /* FVS tcol[1:m]; */
#endif
      /* q-th (pivot) column of the simplex table */
#if 1 /* 23/VI-2017 */
      SPXBP *bp; /* SPXBP bp[1+2*m+1]; */
      /* penalty function break points */
#endif
      int p;
      /* xB[p] is a basic variable chosen to leave the basis;
       * p = 0 means that no basic variable reaches its bound;
       * p < 0 means that non-basic variable xN[q] reaches its opposite
       * bound before any basic variable */
      int p_flag;
      /* if this flag is set, the active bound of xB[p] in the adjacent
       * basis should be set to the upper bound */
#if 0 /* 11/VI-2017 */
      double *trow; /* double trow[1+n-m]; */
#else
      FVS trow; /* FVS trow[1:n-m]; */
#endif
      /* p-th (pivot) row of the simplex table */
#if 0 /* 09/VII-2017 */
      double *work; /* double work[1+m]; */
      /* working array */
#else
      FVS work; /* FVS work[1:m]; */
      /* working vector */
#endif
      int p_stat, d_stat;
      /* primal and dual solution statuses */
      /*--------------------------------------------------------------*/
      /* control parameters (see struct glp_smcp) */
      int msg_lev;
      /* message level */
#if 0 /* 23/VI-2017 */
      int harris;
      /* ratio test technique:
       * 0 - textbook ratio test
       * 1 - Harris' two pass ratio test */
#else
      int r_test;
      /* ratio test technique:
       * GLP_RT_STD  - textbook ratio test
       * GLP_RT_HAR  - Harris' two pass ratio test
       * GLP_RT_FLIP - long-step ratio test (only for phase I) */
#endif
      double tol_bnd, tol_bnd1;
      /* primal feasibility tolerances */
      double tol_dj, tol_dj1;
      /* dual feasibility tolerances */
      double tol_piv;
      /* pivot tolerance */
      int it_lim;
      /* iteration limit */
      int tm_lim;
      /* time limit, milliseconds */
      int out_frq;
#if 0 /* 15/VII-2017 */
      /* display output frequency, iterations */
#else
      /* display output frequency, milliseconds */
#endif
      int out_dly;
      /* display output delay, milliseconds */
      /*--------------------------------------------------------------*/
      /* working parameters */
      double tm_beg;
      /* time value at the beginning of the search */
      int it_beg;
      /* simplex iteration count at the beginning of the search */
      int it_cnt;
      /* simplex iteration count; it increases by one every time the
       * basis changes (including the case when a non-basic variable
       * jumps to its opposite bound) */
      int it_dpy;
      /* simplex iteration count at most recent display output */
#if 1 /* 15/VII-2017 */
      double tm_dpy;
      /* time value at most recent display output */
#endif
      int inv_cnt;
      /* basis factorization count since most recent display output */
#if 1 /* 01/VII-2017 */
      int degen;
      /* count of successive degenerate iterations; this count is used
       * to detect stalling */
#endif
#if 1 /* 23/VI-2017 */
      int ns_cnt, ls_cnt;
      /* normal and long-step iteration counts */
#endif
};


struct BFD
{     /* LP basis factorization driver */
      int valid;
      /* factorization is valid only if this flag is set */
      int type;
      /* type of factorization used:
         0 - interface not established yet
         1 - FHV-factorization
         2 - Schur-complement-based factorization */
      union
      {  void *none;   /* type = 0 */
         FHVINT *fhvi; /* type = 1 */
         SCFINT *scfi; /* type = 2 */
      }  u;
      /* interface to factorization of LP basis */
      glp_bfcp parm;
      /* factorization control parameters */
#ifdef GLP_DEBUG
      SPM *B;
      /* current basis (for testing/debugging only) */
#endif
      int upd_cnt;
      /* factorization update count */
#if 1 /* 21/IV-2014 */
      double b_norm;
      /* 1-norm of matrix B */
      double i_norm;
      /* estimated 1-norm of matrix inv(B) */
#endif
};

/***********************************************************************
*  sum_infeas - compute sum of primal infeasibilities
*
*  This routine compute the sum of primal infeasibilities, which is the
*  current penalty function value. */

static double sum_infeas_fpga(SPXLP *lp, const double beta[/*1+m*/])
{     int m = lp->m;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      int i, k;
      double sum = 0.0;
      for (i = 1; i <= m; i++)
      {  k = head[i]; /* x[k] = xB[i] */
         if (l[k] != -DBL_MAX && beta[i] < l[k])
            sum += l[k] - beta[i];
         if (u[k] != +DBL_MAX && beta[i] > u[k])
            sum += beta[i] - u[k];
      }
      return sum;
}

/***********************************************************************
*  set_penalty - set penalty function coefficients
*
*  This routine sets up objective coefficients of the penalty function,
*  which is the sum of primal infeasibilities, as follows:
*
*     if beta[i] < l[k] - eps1, set c[k] = -1,
*
*     if beta[i] > u[k] + eps2, set c[k] = +1,
*
*     otherwise, set c[k] = 0,
*
*  where beta[i] is current value of basic variable xB[i] = x[k], l[k]
*  and u[k] are original bounds of x[k], and
*
*     eps1 = tol + tol1 * |l[k]|,
*
*     eps2 = tol + tol1 * |u[k]|.
*
*  The routine returns the number of non-zero objective coefficients,
*  which is the number of basic variables violating their bounds. Thus,
*  if the value returned is zero, the current basis is primal feasible
*  within the specified tolerances. */

static int set_penalty_fpga(struct csa *csa, double tol, double tol1)
{     SPXLP *lp = csa->lp;
      int m = lp->m;
      int n = lp->n;
      double *c = lp->c;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      double *beta = csa->beta;
      int i, k, count = 0;
      double t, eps;
      /* reset objective coefficients */
      for (k = 0; k <= n; k++)
         c[k] = 0.0;
      /* walk thru the list of basic variables */
      for (i = 1; i <= m; i++)
      {  k = head[i]; /* x[k] = xB[i] */
         /* check lower bound */
         if ((t = l[k]) != -DBL_MAX)
         {  eps = tol + tol1 * (t >= 0.0 ? +t : -t);
            if (beta[i] < t - eps)
            {  /* lower bound is violated */
               c[k] = -1.0, count++;
            }
         }
         /* check upper bound */
         if ((t = u[k]) != +DBL_MAX)
         {  eps = tol + tol1 * (t >= 0.0 ? +t : -t);
            if (beta[i] > t + eps)
            {  /* upper bound is violated */
               c[k] = +1.0, count++;
            }
         }
      }
      return count;
}


static void remove_perturb_fpga(struct csa *csa)
{     /* remove perturbation */
      SPXLP *lp = csa->lp;
      int m = lp->m;
      int n = lp->n;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      char *flag = lp->flag;
      double *orig_l = csa->orig_l;
      double *orig_u = csa->orig_u;
      int j, k;
      /* restore original bounds of variables */
      memcpy(l, orig_l, (1+n) * sizeof(double));
      memcpy(u, orig_u, (1+n) * sizeof(double));
      /* adjust flags of fixed non-basic variables, because in the
       * perturbed problem such variables might be changed to double-
       * bounded type */
      for (j = 1; j <= n-m; j++)
      {  k = head[m+j]; /* x[k] = xN[j] */
         if (l[k] == u[k])
            flag[j] = 0;
      }
      /* removing perturbation changes primal solution components */
      csa->phase = csa->beta_st = 0;
#if 1
      if (csa->msg_lev >= GLP_MSG_ALL)
         xprintf("Removing LP perturbation [%d]...\n",
            csa->it_cnt);
#endif
      return;
}


/***********************************************************************
*  check_feas - check primal feasibility of basic solution
*
*  This routine checks if the specified values of all basic variables
*  beta = (beta[i]) are within their bounds.
*
*  Let l[k] and u[k] be original bounds of basic variable xB[i] = x[k].
*  The actual bounds of x[k] are determined as follows:
*
*  1) if phase = 1 and c[k] < 0, x[k] violates its lower bound, so its
*     actual bounds are artificial: -inf < x[k] <= l[k];
*
*  2) if phase = 1 and c[k] > 0, x[k] violates its upper bound, so its
*     actual bounds are artificial: u[k] <= x[k] < +inf;
*
*  3) in all other cases (if phase = 1 and c[k] = 0, or if phase = 2)
*     actual bounds are original: l[k] <= x[k] <= u[k].
*
*  The parameters tol and tol1 are bound violation tolerances. The
*  actual bounds l'[k] and u'[k] are considered as non-violated within
*  the specified tolerance if
*
*     l'[k] - eps1 <= beta[i] <= u'[k] + eps2,
*
*  where eps1 = tol + tol1 * |l'[k]|, eps2 = tol + tol1 * |u'[k]|.
*
*  The routine returns one of the following codes:
*
*  0 - solution is feasible (no actual bounds are violated);
*
*  1 - solution is infeasible, however, only artificial bounds are
*      violated (this is possible only if phase = 1);
*
*  2 - solution is infeasible and at least one original bound is
*      violated. */

static int check_feas_fpga(struct csa *csa, int phase, double tol, double
      tol1)
{     SPXLP *lp = csa->lp;
      int m = lp->m;
      double *c = lp->c;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      double *beta = csa->beta;
      int i, k, orig, ret = 0;
      double lk, uk, eps;
      xassert(phase == 1 || phase == 2);
      /* walk thru the list of basic variables */
      for (i = 1; i <= m; i++)
      {  k = head[i]; /* x[k] = xB[i] */
         /* determine actual bounds of x[k] */
         if (phase == 1 && c[k] < 0.0)
         {  /* -inf < x[k] <= l[k] */
            lk = -DBL_MAX, uk = l[k];
            orig = 0; /* artificial bounds */
         }
         else if (phase == 1 && c[k] > 0.0)
         {  /* u[k] <= x[k] < +inf */
            lk = u[k], uk = +DBL_MAX;
            orig = 0; /* artificial bounds */
         }
         else
         {  /* l[k] <= x[k] <= u[k] */
            lk = l[k], uk = u[k];
            orig = 1; /* original bounds */
         }
         /* check actual lower bound */
         if (lk != -DBL_MAX)
         {  eps = tol + tol1 * (lk >= 0.0 ? +lk : -lk);
            if (beta[i] < lk - eps)
            {  /* actual lower bound is violated */
               if (orig)
               {  ret = 2;
                  break;
               }
               ret = 1;
            }
         }
         /* check actual upper bound */
         if (uk != +DBL_MAX)
         {  eps = tol + tol1 * (uk >= 0.0 ? +uk : -uk);
            if (beta[i] > uk + eps)
            {  /* actual upper bound is violated */
               if (orig)
               {  ret = 2;
                  break;
               }
               ret = 1;
            }
         }
      }
      return ret;
}


/***********************************************************************
*  display - display search progress
*
*  This routine displays some information about the search progress
*  that includes:
*
*  search phase;
*
*  number of simplex iterations performed by the solver;
*
*  original objective value;
*
*  sum of (scaled) primal infeasibilities;
*
*  number of infeasibilities (phase I) or non-optimalities (phase II);
*
*  number of basic factorizations since last display output. */

static void display_fpga(struct csa *csa, int spec)
{     int nnn, k;
      double obj, sum, *save, *save1;
#if 1 /* 15/VII-2017 */
      double tm_cur;
#endif
      /* check if the display output should be skipped */
      if (csa->msg_lev < GLP_MSG_ON) goto skip;
#if 1 /* 15/VII-2017 */
      tm_cur = xtime();
#endif
      if (csa->out_dly > 0 &&
#if 0 /* 15/VII-2017 */
         1000.0 * xdifftime(xtime(), csa->tm_beg) < csa->out_dly)
#else
         1000.0 * xdifftime(tm_cur, csa->tm_beg) < csa->out_dly)
#endif
         goto skip;
      if (csa->it_cnt == csa->it_dpy) goto skip;
#if 0 /* 15/VII-2017 */
      if (!spec && csa->it_cnt % csa->out_frq != 0) goto skip;
#else
      if (!spec &&
         1000.0 * xdifftime(tm_cur, csa->tm_dpy) < csa->out_frq)
         goto skip;
#endif
      /* compute original objective value */
      save = csa->lp->c;
      csa->lp->c = csa->orig_c;
      obj = csa->dir * spx_eval_obj(csa->lp, csa->beta);
      csa->lp->c = save;
#if SCALE_Z
      obj *= csa->fz;
#endif
      /* compute sum of (scaled) primal infeasibilities */
#if 1 /* 01/VII-2017 */
      save = csa->lp->l;
      save1 = csa->lp->u;
      csa->lp->l = csa->orig_l;
      csa->lp->u = csa->orig_u;
#endif
      sum = sum_infeas_fpga(csa->lp, csa->beta);
#if 1 /* 01/VII-2017 */
      csa->lp->l = save;
      csa->lp->u = save1;
#endif
      /* compute number of infeasibilities/non-optimalities */
      switch (csa->phase)
      {  case 1:
            nnn = 0;
            for (k = 1; k <= csa->lp->n; k++)
               if (csa->lp->c[k] != 0.0) nnn++;
            break;
         case 2:
            xassert(csa->d_st);
            nnn = spx_chuzc_sel_fpga(csa->lp, csa->d, csa->tol_dj,
               csa->tol_dj1, NULL);
            break;
         default:
            xassert(csa != csa);
      }
      /* display search progress */
      xprintf("%c%6d: obj = %17.9e inf = %11.3e (%d)",
         csa->phase == 2 ? '*' : ' ', csa->it_cnt, obj, sum, nnn);
      if (csa->inv_cnt)
      {  /* number of basis factorizations performed */
         xprintf(" %d", csa->inv_cnt);
         csa->inv_cnt = 0;
      }
#if 1 /* 23/VI-2017 */
      if (csa->phase == 1 && csa->r_test == GLP_RT_FLIP)
      {  /*xprintf("   %d,%d", csa->ns_cnt, csa->ls_cnt);*/
         if (csa->ns_cnt + csa->ls_cnt)
            xprintf(" %d%%",
               (100 * csa->ls_cnt) / (csa->ns_cnt + csa->ls_cnt));
         csa->ns_cnt = csa->ls_cnt = 0;
      }
#endif
      xprintf("\n");
      csa->it_dpy = csa->it_cnt;
#if 1 /* 15/VII-2017 */
      csa->tm_dpy = tm_cur;
#endif
skip: return;
}


/***********************************************************************
*  play_bounds - play bounds of primal variables
*
*  This routine is called after the primal values of basic variables
*  beta[i] were updated and the basis was changed to the adjacent one.
*
*  It is assumed that before updating all the primal values beta[i]
*  were strongly feasible, so in the adjacent basis beta[i] remain
*  feasible within a tolerance, i.e. if some beta[i] violates its lower
*  or upper bound, the violation is insignificant.
*
*  If some beta[i] violates its lower or upper bound, this routine
*  changes (perturbs) the bound to remove such violation, i.e. to make
*  all beta[i] strongly feasible. Otherwise, if beta[i] has a feasible
*  value, this routine attempts to reduce (or remove) perturbation of
*  corresponding lower/upper bound keeping strong feasibility. */

/* FIXME: what to do if l[k] = u[k]? */

/* FIXME: reduce/remove perturbation if x[k] becomes non-basic? */

static void play_bounds_fpga(struct csa *csa, int all)
{     SPXLP *lp = csa->lp;
      int m = lp->m;
      double *c = lp->c;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      double *orig_l = csa->orig_l;
      double *orig_u = csa->orig_u;
      double *beta = csa->beta;
#if 0 /* 11/VI-2017 */
      const double *tcol = csa->tcol; /* was used to update beta */
#else
      const double *tcol = csa->tcol.vec;
#endif
      int i, k;
      xassert(csa->phase == 1 || csa->phase == 2);
      /* primal values beta = (beta[i]) should be valid */
      xassert(csa->beta_st);
      /* walk thru the list of basic variables xB = (xB[i]) */
      for (i = 1; i <= m; i++)
      {  if (all || tcol[i] != 0.0)
         {  /* beta[i] has changed in the adjacent basis */
            k = head[i]; /* x[k] = xB[i] */
            if (csa->phase == 1 && c[k] < 0.0)
            {  /* -inf < xB[i] <= lB[i] (artificial bounds) */
               if (beta[i] < l[k] - 1e-9)
                  continue;
               /* restore actual bounds */
               c[k] = 0.0;
               csa->d_st = 0; /* since c[k] = cB[i] has changed */
            }
            if (csa->phase == 1 && c[k] > 0.0)
            {  /* uB[i] <= xB[i] < +inf (artificial bounds) */
               if (beta[i] > u[k] + 1e-9)
                  continue;
               /* restore actual bounds */
               c[k] = 0.0;
               csa->d_st = 0; /* since c[k] = cB[i] has changed */
            }
            /* lB[i] <= xB[i] <= uB[i] */
            if (csa->phase == 1)
               xassert(c[k] == 0.0);
            if (l[k] != -DBL_MAX)
            {  /* xB[i] has lower bound */
               if (beta[i] < l[k])
               {  /* strong feasibility means xB[i] >= lB[i] */
#if 0 /* 11/VI-2017 */
                  l[k] = beta[i];
#else
                  l[k] = beta[i] - 1e-9;
#endif
               }
               else if (l[k] < orig_l[k])
               {  /* remove/reduce perturbation of lB[i] */
                  if (beta[i] >= orig_l[k])
                     l[k] = orig_l[k];
                  else
                     l[k] = beta[i];
               }
            }
            if (u[k] != +DBL_MAX)
            {  /* xB[i] has upper bound */
               if (beta[i] > u[k])
               {  /* strong feasibility means xB[i] <= uB[i] */
#if 0 /* 11/VI-2017 */
                  u[k] = beta[i];
#else
                  u[k] = beta[i] + 1e-9;
#endif
               }
               else if (u[k] > orig_u[k])
               {  /* remove/reduce perturbation of uB[i] */
                  if (beta[i] <= orig_u[k])
                     u[k] = orig_u[k];
                  else
                     u[k] = beta[i];
               }
            }
         }
      }
      return;
}


/***********************************************************************
*  adjust_penalty - adjust penalty function coefficients
*
*  On searching for primal feasible solution it may happen that some
*  basic variable xB[i] = x[k] has non-zero objective coefficient c[k]
*  indicating that xB[i] violates its lower (if c[k] < 0) or upper (if
*  c[k] > 0) original bound, but due to primal degenarcy the violation
*  is close to zero.
*
*  This routine identifies such basic variables and sets objective
*  coefficients at these variables to zero that allows avoiding zero-
*  step simplex iterations.
*
*  The parameters tol and tol1 are bound violation tolerances. The
*  original bounds l[k] and u[k] are considered as non-violated within
*  the specified tolerance if
*
*     l[k] - eps1 <= beta[i] <= u[k] + eps2,
*
*  where beta[i] is value of basic variable xB[i] = x[k] in the current
*  basis, eps1 = tol + tol1 * |l[k]|, eps2 = tol + tol1 * |u[k]|.
*
*  The routine returns the number of objective coefficients which were
*  set to zero. */

#if 0
static int adjust_penalty_fpga(struct csa *csa, double tol, double tol1)
{     SPXLP *lp = csa->lp;
      int m = lp->m;
      double *c = lp->c;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      double *beta = csa->beta;
      int i, k, count = 0;
      double t, eps;
      xassert(csa->phase == 1);
      /* walk thru the list of basic variables */
      for (i = 1; i <= m; i++)
      {  k = head[i]; /* x[k] = xB[i] */
         if (c[k] < 0.0)
         {  /* x[k] violates its original lower bound l[k] */
            xassert((t = l[k]) != -DBL_MAX);
            eps = tol + tol1 * (t >= 0.0 ? +t : -t);
            if (beta[i] >= t - eps)
            {  /* however, violation is close to zero */
               c[k] = 0.0, count++;
            }
         }
         else if (c[k] > 0.0)
         {  /* x[k] violates its original upper bound u[k] */
            xassert((t = u[k]) != +DBL_MAX);
            eps = tol + tol1 * (t >= 0.0 ? +t : -t);
            if (beta[i] <= t + eps)
            {  /* however, violation is close to zero */
               c[k] = 0.0, count++;
            }
         }
      }
      return count;
}
#else
static int adjust_penalty_fpga(struct csa *csa, int num, const int
      ind[/*1+num*/], double tol, double tol1)
{     SPXLP *lp = csa->lp;
      int m = lp->m;
      double *c = lp->c;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      double *beta = csa->beta;
      int i, k, t, cnt = 0;
      double lk, uk, eps;
      xassert(csa->phase == 1);
      /* walk thru the specified list of basic variables */
      for (t = 1; t <= num; t++)
      {  i = ind[t];
         xassert(1 <= i && i <= m);
         k = head[i]; /* x[k] = xB[i] */
         if (c[k] < 0.0)
         {  /* x[k] violates its original lower bound */
            lk = l[k];
            xassert(lk != -DBL_MAX);
            eps = tol + tol1 * (lk >= 0.0 ? +lk : -lk);
            if (beta[i] >= lk - eps)
            {  /* however, violation is close to zero */
               c[k] = 0.0, cnt++;
            }
         }
         else if (c[k] > 0.0)
         {  /* x[k] violates its original upper bound */
            uk = u[k];
            xassert(uk != +DBL_MAX);
            eps = tol + tol1 * (uk >= 0.0 ? +uk : -uk);
            if (beta[i] <= uk + eps)
            {  /* however, violation is close to zero */
               c[k] = 0.0, cnt++;
            }
         }
      }
      return cnt;
}
#endif


/***********************************************************************
*  choose_pivot - choose xN[q] and xB[p]
*
*  Given the list of eligible non-basic variables this routine first
*  chooses non-basic variable xN[q]. This choice is always possible,
*  because the list is assumed to be non-empty. Then the routine
*  computes q-th column T[*,q] of the simplex table T[i,j] and chooses
*  basic variable xB[p]. If the pivot T[p,q] is small in magnitude,
*  the routine attempts to choose another xN[q] and xB[p] in order to
*  avoid badly conditioned adjacent bases. */

#if 1 /* 17/III-2016 */
#define MIN_RATIO 0.0001

static int choose_pivot_fpga(struct csa *csa)
{     SPXLP *lp = csa->lp;
      int m = lp->m;
      int n = lp->n;
      double *beta = csa->beta;
      double *d = csa->d;
      SPXSE *se = csa->se;
      int *list = csa->list;
#if 0 /* 09/VII-2017 */
      double *tcol = csa->work;
#else
      double *tcol = csa->work.vec;
#endif
      double tol_piv = csa->tol_piv;
      int try, nnn, /*i,*/ p, p_flag, q, t;
      double big, /*temp,*/ best_ratio;
#if 1 /* 23/VI-2017 */
      double *c = lp->c;
      int *head = lp->head;
      SPXBP *bp = csa->bp;
      int nbp, t_best, ret, k;
      double dz_best;
#endif
      xassert(csa->beta_st);
      xassert(csa->d_st);
more: /* initial number of eligible non-basic variables */
      nnn = csa->num;
      /* nothing has been chosen so far */
      csa->q = 0;
      best_ratio = 0.0;
#if 0 /* 23/VI-2017 */
      try = 0;
#else
      try = ret = 0;
#endif
try:  /* choose non-basic variable xN[q] */
      xassert(nnn > 0);
      try++;
      if (se == NULL)
      {  /* Dantzig's rule */
         q = spx_chuzc_std(lp, d, nnn, list);
      }
      else
      {  /* projected steepest edge */
         q = spx_chuzc_pse(lp, se, d, nnn, list);
      }
      xassert(1 <= q && q <= n-m);
      /* compute q-th column of the simplex table */
      spx_eval_tcol(lp, q, tcol);
#if 0
      /* big := max(1, |tcol[1]|, ..., |tcol[m]|) */
      big = 1.0;
      for (i = 1; i <= m; i++)
      {  temp = tcol[i];
         if (temp < 0.0)
            temp = - temp;
         if (big < temp)
            big = temp;
      }
#else
      /* this still puzzles me */
      big = 1.0;
#endif
      /* choose basic variable xB[p] */
#if 1 /* 23/VI-2017 */
      if (csa->phase == 1 && csa->r_test == GLP_RT_FLIP && try <= 2)
      {  /* long-step ratio test */
         int t, num, num1;
         double slope, teta_lim;
         /* determine penalty function break points */
         nbp = spx_ls_eval_bp(lp, beta, q, d[q], tcol, tol_piv, bp);
         if (nbp < 2)
            goto skip;
         /* set initial slope */
         slope = - fabs(d[q]);
         /* estimate initial teta_lim */
         teta_lim = DBL_MAX;
         for (t = 1; t <= nbp; t++)
         {  if (teta_lim > bp[t].teta)
               teta_lim = bp[t].teta;
         }
         xassert(teta_lim >= 0.0);
         if (teta_lim < 1e-3)
            teta_lim = 1e-3;
         /* nothing has been chosen so far */
         t_best = 0, dz_best = 0.0, num = 0;
         /* choose appropriate break point */
         while (num < nbp)
         {  /* select and process a new portion of break points */
            num1 = spx_ls_select_bp(lp, tcol, nbp, bp, num, &slope,
               teta_lim);
            for (t = num+1; t <= num1; t++)
            {  int i = (bp[t].i >= 0 ? bp[t].i : -bp[t].i);
               xassert(0 <= i && i <= m);
               if (i == 0 || fabs(tcol[i]) / big >= MIN_RATIO)
               {  if (dz_best > bp[t].dz)
                     t_best = t, dz_best = bp[t].dz;
               }
#if 0
               if (i == 0)
               {  /* do not consider further break points beyond this
                   * point, where xN[q] reaches its opposite bound;
                   * in principle (see spx_ls_eval_bp), this break
                   * point should be the last one, however, due to
                   * round-off errors there may be other break points
                   * with the same teta beyond this one */
                  slope = +1.0;
               }
#endif
            }
            if (slope > 0.0)
            {  /* penalty function starts increasing */
               break;
            }
            /* penalty function continues decreasing */
            num = num1;
            teta_lim += teta_lim;
         }
         if (dz_best == 0.0)
            goto skip;
         /* the choice has been made */
         xassert(1 <= t_best && t_best <= num1);
         if (t_best == 1)
         {  /* the very first break point was chosen; it is reasonable
             * to use the short-step ratio test */
            goto skip;
         }
         csa->q = q;
         memcpy(&csa->tcol.vec[1], &tcol[1], m * sizeof(double));
         fvs_gather_vec(&csa->tcol, DBL_EPSILON);
         if (bp[t_best].i == 0)
         {  /* xN[q] goes to its opposite bound */
            csa->p = -1;
            csa->p_flag = 0;
            best_ratio = 1.0;
         }
         else if (bp[t_best].i > 0)
         {  /* xB[p] leaves the basis and goes to its lower bound */
            csa->p = + bp[t_best].i;
            xassert(1 <= csa->p && csa->p <= m);
            csa->p_flag = 0;
            best_ratio = fabs(tcol[csa->p]) / big;
         }
         else
         {  /* xB[p] leaves the basis and goes to its upper bound */
            csa->p = - bp[t_best].i;
            xassert(1 <= csa->p && csa->p <= m);
            csa->p_flag = 1;
            best_ratio = fabs(tcol[csa->p]) / big;
         }
#if 0
         xprintf("num1 = %d; t_best = %d; dz = %g\n", num1, t_best,
            bp[t_best].dz);
#endif
         ret = 1;
         goto done;
skip:    ;
      }
#endif
#if 0 /* 23/VI-2017 */
      if (!csa->harris)
#else
      if (csa->r_test == GLP_RT_STD)
#endif
      {  /* textbook ratio test */
         p = spx_chuzr_std(lp, csa->phase, beta, q,
            d[q] < 0.0 ? +1. : -1., tcol, &p_flag, tol_piv,
            .30 * csa->tol_bnd, .30 * csa->tol_bnd1);
      }
      else
      {  /* Harris' two-pass ratio test */
         p = spx_chuzr_harris(lp, csa->phase, beta, q,
            d[q] < 0.0 ? +1. : -1., tcol, &p_flag , tol_piv,
            .50 * csa->tol_bnd, .50 * csa->tol_bnd1);
      }
      if (p <= 0)
      {  /* primal unboundedness or special case */
         csa->q = q;
#if 0 /* 11/VI-2017 */
         memcpy(&csa->tcol[1], &tcol[1], m * sizeof(double));
#else
         memcpy(&csa->tcol.vec[1], &tcol[1], m * sizeof(double));
         fvs_gather_vec(&csa->tcol, DBL_EPSILON);
#endif
         csa->p = p;
         csa->p_flag = p_flag;
         best_ratio = 1.0;
         goto done;
      }
      /* either keep previous choice or accept new choice depending on
       * which one is better */
      if (best_ratio < fabs(tcol[p]) / big)
      {  csa->q = q;
#if 0 /* 11/VI-2017 */
         memcpy(&csa->tcol[1], &tcol[1], m * sizeof(double));
#else
         memcpy(&csa->tcol.vec[1], &tcol[1], m * sizeof(double));
         fvs_gather_vec(&csa->tcol, DBL_EPSILON);
#endif
         csa->p = p;
         csa->p_flag = p_flag;
         best_ratio = fabs(tcol[p]) / big;
      }
      /* check if the current choice is acceptable */
      if (best_ratio >= MIN_RATIO || nnn == 1 || try == 5)
         goto done;
      /* try to choose other xN[q] and xB[p] */
      /* find xN[q] in the list */
      for (t = 1; t <= nnn; t++)
         if (list[t] == q) break;
      xassert(t <= nnn);
      /* move xN[q] to the end of the list */
      list[t] = list[nnn], list[nnn] = q;
      /* and exclude it from consideration */
      nnn--;
      /* repeat the choice */
      goto try;
done: /* the choice has been made */
#if 1 /* FIXME: currently just to avoid badly conditioned basis */
      if (best_ratio < .001 * MIN_RATIO)
      {  /* looks like this helps */
         if (bfd_get_count(lp->bfd) > 0)
            return -1;
         /* didn't help; last chance to improve the choice */
         if (tol_piv == csa->tol_piv)
         {  tol_piv *= 1000.;
            goto more;
         }
      }
#endif
#if 0 /* 23/VI-2017 */
      return 0;
#else /* FIXME */
      if (ret)
      {  /* invalidate dual basic solution components */
         csa->d_st = 0;
         /* change penalty function coefficients at basic variables for
          * all break points preceding the chosen one */
         for (t = 1; t < t_best; t++)
         {  int i = (bp[t].i >= 0 ? bp[t].i : -bp[t].i);
            xassert(0 <= i && i <= m);
            if (i == 0)
            {  /* xN[q] crosses its opposite bound */
               xassert(1 <= csa->q && csa->q <= n-m);
               k = head[m+csa->q];
            }
            else
            {  /* xB[i] crosses its (lower or upper) bound */
               k = head[i]; /* x[k] = xB[i] */
            }
            c[k] += bp[t].dc;
            xassert(c[k] == 0.0 || c[k] == +1.0 || c[k] == -1.0);
         }
      }
      return ret;
#endif
}
#endif


/***********************************************************************
*  spx_change_basis - change current basis to adjacent one
*
*  This routine changes the current basis to the adjacent one making
*  necessary changes in lp->head and lp->flag members.
*
*  The parameters p, p_flag, and q have the same meaning as for the
*  routine spx_update_beta. */

void spx_change_basis_fpga(SPXLP *lp, int p, int p_flag, int q)
{     int m = lp->m;
      int n = lp->n;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      char *flag = lp->flag;
      int k;
      if (p < 0)
      {  /* special case: xN[q] goes to its opposite bound */
         xassert(1 <= q && q <= n-m);
         /* xN[q] should be double-bounded variable */
         k = head[m+q]; /* x[k] = xN[q] */
         xassert(l[k] != -DBL_MAX && u[k] != +DBL_MAX && l[k] != u[k]);
         /* change active bound flag */
         flag[q] = 1 - flag[q];
      }
      else
      {  /* xB[p] leaves the basis, xN[q] enters the basis */
         xassert(1 <= p && p <= m);
         xassert(p_flag == 0 || p_flag == 1);
         xassert(1 <= q && q <= n-m);
         k = head[p]; /* xB[p] = x[k] */
         if (p_flag)
         {  /* xB[p] goes to its upper bound */
            xassert(l[k] != u[k] && u[k] != +DBL_MAX);
         }
         /* swap xB[p] and xN[q] in the basis */
         head[p] = head[m+q], head[m+q] = k;
         /* and set active bound flag for new xN[q] */
         lp->flag[q] = p_flag;
      }
      return;
}


int bfd_update_fpga(BFD *bfd, int j, int len, const int ind[], const double
      val[])
{     /* update LP basis factorization */
      int ret;
      xassert(bfd->valid);
      switch (bfd->type)
      {  case 1:
            ret = fhvint_update(bfd->u.fhvi, j, len, ind, val);
#if 1 /* FIXME */
            switch (ret)
            {  case 0:
                  break;
               case 1:
                  ret = BFD_ESING;
                  break;
               case 2:
               case 3:
                  ret = BFD_ECOND;
                  break;
               case 4:
                  ret = BFD_ELIMIT;
                  break;
               case 5:
                  ret = BFD_ECHECK;
                  break;
               default:
                  xassert(ret != ret);
            }
#endif
            break;
         case 2:
            switch (bfd->parm.type & 0x0F)
            {  case GLP_BF_BG:
                  ret = scfint_update(bfd->u.scfi, 1, j, len, ind, val);
                  break;
               case GLP_BF_GR:
                  ret = scfint_update(bfd->u.scfi, 2, j, len, ind, val);
                  break;
               default:
                  xassert(bfd != bfd);
            }
#if 1 /* FIXME */
            switch (ret)
            {  case 0:
                  break;
               case 1:
                  ret = BFD_ELIMIT;
                  break;
               case 2:
                  ret = BFD_ECOND;
                  break;
               default:
                  xassert(ret != ret);
            }
#endif
            break;
         default:
            xassert(bfd != bfd);
      }
      if (ret != 0)
      {  /* updating factorization failed */
         bfd->valid = 0;
      }
#ifdef GLP_DEBUG
      /* save updated LP basis */
      {  SPME *e;
         int k;
         for (e = bfd->B->col[j]; e != NULL; e = e->c_next)
            e->val = 0.0;
         spm_drop_zeros(bfd->B, 0.0);
         for (k = 1; k <= len; k++)
            spm_new_elem(bfd->B, ind[k], j, val[k]);
      }
#endif
      if (ret == 0)
         bfd->upd_cnt++;
      return ret;
}

/***********************************************************************
*  spx_update_invb - update factorization of basis matrix
*
*  This routine updates factorization of the basis matrix B when i-th
*  column of B is replaced by k-th column of the constraint matrix A.
*
*  The parameter 1 <= i <= m specifies the number of column of matrix B
*  to be replaced by a new column.
*
*  The parameter 1 <= k <= n specifies the number of column of matrix A
*  to be used for replacement.
*
*  If the factorization has been successfully updated, the routine
*  validates it and returns zero. Otherwise, the routine invalidates
*  the factorization and returns the code provided by the factorization
*  driver (bfd_update). */

int spx_update_invb_fpga(SPXLP *lp, int i, int k)
{     int m = lp->m;
      int n = lp->n;
      int *A_ptr = lp->A_ptr;
      int *A_ind = lp->A_ind;
      double *A_val = lp->A_val;
      int ptr, len, ret;
      xassert(1 <= i && i <= m);
      xassert(1 <= k && k <= n);
      ptr = A_ptr[k];
      len = A_ptr[k+1] - ptr;
      ret = bfd_update_fpga(lp->bfd, i, len, &A_ind[ptr-1], &A_val[ptr-1]);
      lp->valid = (ret == 0);
      return ret;
}


/***********************************************************************
*  spx_factorize - compute factorization of current basis matrix
*
*  This routine computes factorization of the current basis matrix B.
*
*  If the factorization has been successfully computed, the routine
*  validates it and returns zero. Otherwise, the routine invalidates
*  the factorization and returns the code provided by the factorization
*  driver (bfd_factorize). */

static int jth_col_fpga(void *info, int j, int ind[], double val[])
{     /* provide column B[j] */
      SPXLP *lp = info;
      int m = lp->m;
      int *A_ptr = lp->A_ptr;
      int *head = lp->head;
      int k, ptr, len;
      xassert(1 <= j && j <= m);
      k = head[j]; /* x[k] = xB[j] */
      ptr = A_ptr[k];
      len = A_ptr[k+1] - ptr;
      memcpy(&ind[1], &lp->A_ind[ptr], len * sizeof(int));
      memcpy(&val[1], &lp->A_val[ptr], len * sizeof(double));
      return len;
}

int spx_factorize_fpga(SPXLP *lp)
{     int ret;
      ret = bfd_factorize(lp->bfd, lp->m, jth_col_fpga, lp);
      lp->valid = (ret == 0);
      return ret;
}


/***********************************************************************
*  luf_vt_solve - solve system V' * x = b
*
*  This routine solves the system V' * x = b, where V' is a matrix
*  transposed to the matrix V, which is the right factor of the sparse
*  LU-factorization.
*
*  On entry the array b should contain elements of the right-hand side
*  vector b in locations b[1], ..., b[n], where n is the order of the
*  matrix V. On exit the array x will contain elements of the solution
*  vector x in locations x[1], ..., x[n]. Note that the array b will be
*  clobbered on exit. */

void luf_vt_solve_fpga(LUF *luf, double b[/*1+n*/], double x[/*1+n*/])
{     int n = luf->n;
      SVA *sva = luf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      double *vr_piv = luf->vr_piv;
      int vr_ref = luf->vr_ref;
      int *vr_ptr = &sva->ptr[vr_ref-1];
      int *vr_len = &sva->len[vr_ref-1];
      int *pp_inv = luf->pp_inv;
      int *qq_ind = luf->qq_ind;
      int i, j, k, ptr, end;
      double x_i;
      for (k = 1; k <= n; k++)
      {  /* k-th row of U' = j-th column of V */
         /* k-th column of U' = i-th row of V */
         i = pp_inv[k];
         j = qq_ind[k];
         /* compute x[i] = b[j] / u'[k,k], where u'[k,k] = v[i,j];
          * walk through i-th row of matrix V and substitute x[i] into
          * other equations */
         if ((x_i = x[i] = b[j] / vr_piv[i]) != 0.0)
         {  for (end = (ptr = vr_ptr[i]) + vr_len[i]; ptr < end; ptr++)
               b[sv_ind[ptr]] -= sv_val[ptr] * x_i;
         }
      }
      return;
}


/***********************************************************************
*  fhv_ht_solve - solve system H' * x = b
*
*  This routine solves the system H' * x = b, where H' is a matrix
*  transposed to the matrix H, which is the middle factor of the sparse
*  updatable FHV-factorization.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix H. On exit this array will contain elements of the solution
*  vector x in the same locations. */

void fhv_ht_solve_fpga(FHV *fhv, double x[/*1+n*/])
{     SVA *sva = fhv->luf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int nfs = fhv->nfs;
      int *hh_ind = fhv->hh_ind;
      int hh_ref = fhv->hh_ref;
      int *hh_ptr = &sva->ptr[hh_ref-1];
      int *hh_len = &sva->len[hh_ref-1];
      int k, end, ptr;
      double x_j;
      for (k = nfs; k >= 1; k--)
      {  if ((x_j = x[hh_ind[k]]) == 0.0)
            continue;
         for (end = (ptr = hh_ptr[k]) + hh_len[k]; ptr < end; ptr++)
            x[sv_ind[ptr]] -= sv_val[ptr] * x_j;
      }
      return;
}

void fhvint_btran_fpga(FHVINT *fi, double x[])
{     /* solve system A'* x = b */
      FHV *fhv = &fi->fhv;
      LUF *luf = fhv->luf;
      int n = luf->n;
      int *pp_ind = luf->pp_ind;
      int *pp_inv = luf->pp_inv;
      SGF *sgf = fi->lufi->sgf;
      double *work = sgf->work;
      xassert(fi->valid);
      /* A' = (F * H * V)' = V'* H'* F' */
      /* x = inv(A') * b = inv(F') * inv(H') * inv(V') * b */
      luf_vt_solve_fpga(luf, x, work);
      fhv_ht_solve_fpga(fhv, work);
      luf->pp_ind = fhv->p0_ind;
      luf->pp_inv = fhv->p0_inv;
      luf_ft_solve(luf, work);
      luf->pp_ind = pp_ind;
      luf->pp_inv = pp_inv;
      memcpy(&x[1], &work[1], n * sizeof(double));
      return;
}

/***********************************************************************
*  btf_a_solve - solve system A * x = b
*
*  This routine solves the system A * x = b, where A is the original
*  matrix.
*
*  On entry the array b should contain elements of the right-hand size
*  vector b in locations b[1], ..., b[n], where n is the order of the
*  matrix A. On exit the array x will contain elements of the solution
*  vector in locations x[1], ..., x[n]. Note that the array b will be
*  clobbered on exit.
*
*  The routine also uses locations [1], ..., [max_size] of two working
*  arrays w1 and w2, where max_size is the maximal size of diagonal
*  blocks in BT-factorization (max_size <= n). */

void btf_a_solve_fpga(BTF *btf, double b[/*1+n*/], double x[/*1+n*/],
      double w1[/*1+n*/], double w2[/*1+n*/])
{     SVA *sva = btf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int *pp_inv = btf->pp_inv;
      int *qq_ind = btf->qq_ind;
      int num = btf->num;
      int *beg = btf->beg;
      int ac_ref = btf->ac_ref;
      int *ac_ptr = &sva->ptr[ac_ref-1];
      int *ac_len = &sva->len[ac_ref-1];
      double *bb = w1;
      double *xx = w2;
      LUF luf;
      int i, j, jj, k, beg_k, flag;
      double t;
      for (k = num; k >= 1; k--)
      {  /* determine order of diagonal block A~[k,k] */
         luf.n = beg[k+1] - (beg_k = beg[k]);
         if (luf.n == 1)
         {  /* trivial case */
            /* solve system A~[k,k] * X[k] = B[k] */
            t = x[qq_ind[beg_k]] =
               b[pp_inv[beg_k]] / btf->vr_piv[beg_k];
            /* substitute X[k] into other equations */
            if (t != 0.0)
            {  int ptr = ac_ptr[qq_ind[beg_k]];
               int end = ptr + ac_len[qq_ind[beg_k]];
               for (; ptr < end; ptr++)
                  b[sv_ind[ptr]] -= sv_val[ptr] * t;
            }
         }
         else
         {  /* general case */
            /* construct B[k] */
            flag = 0;
            for (i = 1; i <= luf.n; i++)
            {  if ((bb[i] = b[pp_inv[i + (beg_k-1)]]) != 0.0)
                  flag = 1;
            }
            /* solve system A~[k,k] * X[k] = B[k] */
            if (!flag)
            {  /* B[k] = 0, so X[k] = 0 */
               for (j = 1; j <= luf.n; j++)
                  x[qq_ind[j + (beg_k-1)]] = 0.0;
               continue;
            }
            luf.sva = sva;
            luf.fr_ref = btf->fr_ref + (beg_k-1);
            luf.fc_ref = btf->fc_ref + (beg_k-1);
            luf.vr_ref = btf->vr_ref + (beg_k-1);
            luf.vr_piv = btf->vr_piv + (beg_k-1);
            luf.vc_ref = btf->vc_ref + (beg_k-1);
            luf.pp_ind = btf->p1_ind + (beg_k-1);
            luf.pp_inv = btf->p1_inv + (beg_k-1);
            luf.qq_ind = btf->q1_ind + (beg_k-1);
            luf.qq_inv = btf->q1_inv + (beg_k-1);
            luf_f_solve_fpga(&luf, bb);
            luf_v_solve_fpga(&luf, bb, xx);
            /* store X[k] and substitute it into other equations */
            for (j = 1; j <= luf.n; j++)
            {  jj = j + (beg_k-1);
               t = x[qq_ind[jj]] = xx[j];
               if (t != 0.0)
               {  int ptr = ac_ptr[qq_ind[jj]];
                  int end = ptr + ac_len[qq_ind[jj]];
                  for (; ptr < end; ptr++)
                     b[sv_ind[ptr]] -= sv_val[ptr] * t;
               }
            }
         }
      }
      return;
}

/***********************************************************************
*  btf_at_solve - solve system A'* x = b
*
*  This routine solves the system A'* x = b, where A' is a matrix
*  transposed to the original matrix A.
*
*  On entry the array b should contain elements of the right-hand size
*  vector b in locations b[1], ..., b[n], where n is the order of the
*  matrix A. On exit the array x will contain elements of the solution
*  vector in locations x[1], ..., x[n]. Note that the array b will be
*  clobbered on exit.
*
*  The routine also uses locations [1], ..., [max_size] of two working
*  arrays w1 and w2, where max_size is the maximal size of diagonal
*  blocks in BT-factorization (max_size <= n). */

void btf_at_solve_fpga(BTF *btf, double b[/*1+n*/], double x[/*1+n*/],
      double w1[/*1+n*/], double w2[/*1+n*/])
{     SVA *sva = btf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int *pp_inv = btf->pp_inv;
      int *qq_ind = btf->qq_ind;
      int num = btf->num;
      int *beg = btf->beg;
      int ar_ref = btf->ar_ref;
      int *ar_ptr = &sva->ptr[ar_ref-1];
      int *ar_len = &sva->len[ar_ref-1];
      double *bb = w1;
      double *xx = w2;
      LUF luf;
      int i, j, jj, k, beg_k, flag;
      double t;
      for (k = 1; k <= num; k++)
      {  /* determine order of diagonal block A~[k,k] */
         luf.n = beg[k+1] - (beg_k = beg[k]);
         if (luf.n == 1)
         {  /* trivial case */
            /* solve system A~'[k,k] * X[k] = B[k] */
            t = x[pp_inv[beg_k]] =
               b[qq_ind[beg_k]] / btf->vr_piv[beg_k];
            /* substitute X[k] into other equations */
            if (t != 0.0)
            {  int ptr = ar_ptr[pp_inv[beg_k]];
               int end = ptr + ar_len[pp_inv[beg_k]];
               for (; ptr < end; ptr++)
                  b[sv_ind[ptr]] -= sv_val[ptr] * t;
            }
         }
         else
         {  /* general case */
            /* construct B[k] */
            flag = 0;
            for (i = 1; i <= luf.n; i++)
            {  if ((bb[i] = b[qq_ind[i + (beg_k-1)]]) != 0.0)
                  flag = 1;
            }
            /* solve system A~'[k,k] * X[k] = B[k] */
            if (!flag)
            {  /* B[k] = 0, so X[k] = 0 */
               for (j = 1; j <= luf.n; j++)
                  x[pp_inv[j + (beg_k-1)]] = 0.0;
               continue;
            }
            luf.sva = sva;
            luf.fr_ref = btf->fr_ref + (beg_k-1);
            luf.fc_ref = btf->fc_ref + (beg_k-1);
            luf.vr_ref = btf->vr_ref + (beg_k-1);
            luf.vr_piv = btf->vr_piv + (beg_k-1);
            luf.vc_ref = btf->vc_ref + (beg_k-1);
            luf.pp_ind = btf->p1_ind + (beg_k-1);
            luf.pp_inv = btf->p1_inv + (beg_k-1);
            luf.qq_ind = btf->q1_ind + (beg_k-1);
            luf.qq_inv = btf->q1_inv + (beg_k-1);
            luf_vt_solve_fpga(&luf, bb, xx);
            luf_ft_solve(&luf, xx);
            /* store X[k] and substitute it into other equations */
            for (j = 1; j <= luf.n; j++)
            {  jj = j + (beg_k-1);
               t = x[pp_inv[jj]] = xx[j];
               if (t != 0.0)
               {  int ptr = ar_ptr[pp_inv[jj]];
                  int end = ptr + ar_len[pp_inv[jj]];
                  for (; ptr < end; ptr++)
                     b[sv_ind[ptr]] -= sv_val[ptr] * t;
               }
            }
         }
      }
      return;
}


/***********************************************************************
*  luf_v_solve - solve system V * x = b
*
*  This routine solves the system V * x = b, where the matrix V is the
*  right factor of the sparse LU-factorization.
*
*  On entry the array b should contain elements of the right-hand side
*  vector b in locations b[1], ..., b[n], where n is the order of the
*  matrix V. On exit the array x will contain elements of the solution
*  vector x in locations x[1], ..., x[n]. Note that the array b will be
*  clobbered on exit. */

void luf_v_solve_fpga(LUF *luf, double b[/*1+n*/], double x[/*1+n*/])
{     int n = luf->n;
      SVA *sva = luf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      double *vr_piv = luf->vr_piv;
      int vc_ref = luf->vc_ref;
      int *vc_ptr = &sva->ptr[vc_ref-1];
      int *vc_len = &sva->len[vc_ref-1];
      int *pp_inv = luf->pp_inv;
      int *qq_ind = luf->qq_ind;
      int i, j, k, ptr, end;
      double x_j;
      for (k = n; k >= 1; k--)
      {  /* k-th row of U = i-th row of V */
         /* k-th column of U = j-th column of V */
         i = pp_inv[k];
         j = qq_ind[k];
         /* compute x[j] = b[i] / u[k,k], where u[k,k] = v[i,j];
          * walk through j-th column of matrix V and substitute x[j]
          * into other equations */
         if ((x_j = x[j] = b[i] / vr_piv[i]) != 0.0)
         {  for (end = (ptr = vc_ptr[j]) + vc_len[j]; ptr < end; ptr++)
               b[sv_ind[ptr]] -= sv_val[ptr] * x_j;
         }
      }
      return;
}


/***********************************************************************
*  scf_s0_solve - solve system S0 * x = b or S0'* x = b
*
*  This routine solves the system S0 * x = b (if tr is zero) or the
*  system S0'* x = b (if tr is non-zero), where S0 is the right factor
*  of the initial matrix A0 = R0 * S0.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n0], where n0 is the order of the
*  matrix S0. On exit the array x will contain elements of the solution
*  vector in the same locations.
*
*  The routine uses locations [1], ..., [n0] of three working arrays
*  w1, w2, and w3. (In case of type = 1 arrays w2 and w3 are not used
*  and can be specified as NULL.) */

void scf_s0_solve_fpga(SCF *scf, int tr, double x[/*1+n0*/],
      double w1[/*1+n0*/], double w2[/*1+n0*/], double w3[/*1+n0*/])
{     int n0 = scf->n0;
      switch (scf->type)
      {  case 1:
            /* A0 = F0 * V0, so S0 = V0 */
            if (!tr)
               luf_v_solve_fpga(scf->a0.luf, x, w1);
            else
               luf_vt_solve_fpga(scf->a0.luf, x, w1);
            break;
         case 2:
            /* A0 = I * A0, so S0 = A0 */
            if (!tr)
               btf_a_solve_fpga(scf->a0.btf, x, w1, w2, w3);
            else
               btf_at_solve_fpga(scf->a0.btf, x, w1, w2, w3);
            break;
         default:
            xassert(scf != scf);
      }
      memcpy(&x[1], &w1[1], n0 * sizeof(double));
      return;
}

/***********************************************************************
*  scf_st_prod - compute product y := y + alpha * S'* x
*
*  This routine computes the product y := y + alpha * S'* x, where
*  S' is a matrix transposed to S, x is a n0-vector, alpha is a scalar,
*  y is a nn-vector.
*
*  Since matrix S is available by columns, the product components are
*  computed as inner products:
*
*     y[j] := y[j] + alpha * (j-th column of S) * x
*
*  for j = 1, 2, ..., nn. */

void scf_st_prod_fpga(SCF *scf, double y[/*1+nn*/], double a, const double
      x[/*1+n0*/])
{     int nn = scf->nn;
      SVA *sva = scf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int ss_ref = scf->ss_ref;
      int *ss_ptr = &sva->ptr[ss_ref-1];
      int *ss_len = &sva->len[ss_ref-1];
      int j, ptr, end;
      double t;
      for (j = 1; j <= nn; j++)
      {  /* t := (j-th column of S) * x */
         t = 0.0;
         for (end = (ptr = ss_ptr[j]) + ss_len[j]; ptr < end; ptr++)
            t += sv_val[ptr] * x[sv_ind[ptr]];
         /* y[j] := y[j] + alpha * t */
         y[j] += a * t;
      }
      return;
}

/***********************************************************************
*  ifu_at_solve - solve system A'* x = b
*
*  This routine solves the system A'* x = b, where A' is a matrix
*  transposed to the matrix A, specified by its IFU-factorization.
*
*  Using the main equality F * A = U, from which it follows that
*  A'* F' = U', we have:
*
*     A'* x = b  =>  A'* F'* inv(F') * x = b  =>
*
*     U'* inv(F') * x = b  =>  inv(F') * x = inv(U') * b  =>
*
*     x = F' * inv(U') * b.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix A. On exit this array will contain elements of the solution
*  vector x in the same locations.
*
*  The working array w should have at least 1+n elements (0-th element
*  is not used). */

void ifu_at_solve_fpga(IFU *ifu, double x[/*1+n*/], double w[/*1+n*/])
{     /* non-optimized version */
      int n_max = ifu->n_max;
      int n = ifu->n;
      double *f_ = ifu->f;
      double *u_ = ifu->u;
      int i, j;
      double t;
#     define f(i,j) f_[(i)*n_max+(j)]
#     define u(i,j) u_[(i)*n_max+(j)]
      xassert(0 <= n && n <= n_max);
      /* adjust indexing */
      x++, w++;
      /* y := inv(U') * b */
      for (i = 0; i < n; i++)
      {  t = (x[i] /= u(i,i));
         for (j = i+1; j < n; j++)
            x[j] -= u(i,j) * t;
      }
      /* x := F'* y */
      for (j = 0; j < n; j++)
      {  /* x[j] := (j-th column of F) * y */
         t = 0.0;
         for (i = 0; i < n; i++)
            t += f(i,j) * x[i];
         w[j] = t;
      }
      memcpy(x, w, n * sizeof(double));
#     undef f
#     undef u
      return;
}

/***********************************************************************
*  scf_rt_prod - compute product y := y + alpha * R'* x
*
*  This routine computes the product y := y + alpha * R'* x, where
*  R' is a matrix transposed to R, x is a nn-vector, alpha is a scalar,
*  y is a n0-vector.
*
*  Since matrix R is available by rows, the product is computed as a
*  linear combination:
*
*     y := y + alpha * (R'[1] * x[1] + ... + R'[nn] * x[nn]),
*
*  where R'[i] is i-th row of R. */

void scf_rt_prod_fpga(SCF *scf, double y[/*1+n0*/], double a, const double
      x[/*1+nn*/])
{     int nn = scf->nn;
      SVA *sva = scf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int rr_ref = scf->rr_ref;
      int *rr_ptr = &sva->ptr[rr_ref-1];
      int *rr_len = &sva->len[rr_ref-1];
      int i, ptr, end;
      double t;
      for (i = 1; i <= nn; i++)
      {  if (x[i] == 0.0)
            continue;
         /* y := y + alpha * R'[i] * x[i] */
         t = a * x[i];
         for (end = (ptr = rr_ptr[i]) + rr_len[i]; ptr < end; ptr++)
            y[sv_ind[ptr]] += sv_val[ptr] * t;
      }
      return;
}

/***********************************************************************
*  scf_r0_solve - solve system R0 * x = b or R0'* x = b
*
*  This routine solves the system R0 * x = b (if tr is zero) or the
*  system R0'* x = b (if tr is non-zero), where R0 is the left factor
*  of the initial matrix A0 = R0 * S0.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n0], where n0 is the order of the
*  matrix R0. On exit the array x will contain elements of the solution
*  vector in the same locations. */

void scf_r0_solve_fpga(SCF *scf, int tr, double x[/*1+n0*/])
{     switch (scf->type)
      {  case 1:
            /* A0 = F0 * V0, so R0 = F0 */
            if (!tr)
               luf_f_solve_fpga(scf->a0.luf, x);
            else
               luf_ft_solve(scf->a0.luf, x);
            break;
         case 2:
            /* A0 = I * A0, so R0 = I */
            break;
         default:
            xassert(scf != scf);
      }
      return;
}


/***********************************************************************
*  scf_at_solve - solve system A'* x = b
*
*  This routine solves the system A'* x = b, where A' is a matrix
*  transposed to the current matrix A.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix A. On exit the array x will contain elements of the solution
*  vector in the same locations.
*
*  For details see the program documentation. */

void scf_at_solve_fpga(SCF *scf, double x[/*1+n*/],
      double w[/*1+n0+nn*/], double work1[/*1+max(n0,nn)*/],
      double work2[/*1+n*/], double work3[/*1+n*/])
{     int n = scf->n;
      int n0 = scf->n0;
      int nn = scf->nn;
      int *pp_inv = scf->pp_inv;
      int *qq_ind = scf->qq_ind;
      int i, ii;
      /* (u1, u2) := Q * (b, 0) */
      for (ii = 1; ii <= n0+nn; ii++)
      {  i = qq_ind[ii];
         w[ii] = (i <= n ? x[i] : 0.0);
      }
      /* v1 := inv(S0') * u1 */
      scf_s0_solve_fpga(scf, 1, &w[0], work1, work2, work3);
      /* v2 := inv(C') * (u2 - S'* v1) */
      scf_st_prod_fpga(scf, &w[n0], -1.0, &w[0]);
      ifu_at_solve_fpga(&scf->ifu, &w[n0], work1);
      /* w2 := v2 */
      /* nop */
      /* w1 := inv(R0') * (v1 - R'* w2) */
      scf_rt_prod_fpga(scf, &w[0], -1.0, &w[n0]);
      scf_r0_solve_fpga(scf, 1, &w[0]);
      /* compute (x, x~) := P * (w1, w2); x~ is not needed */
      for (i = 1; i <= n; i++)
      {
#if 1 /* FIXME: currently P = I */
         xassert(pp_inv[i] == i);
#endif
         x[i] = w[pp_inv[i]];
      }
      return;
}

void scfint_btran_fpga(SCFINT *fi, double x[])
{     /* solve system A'* x = b */
      xassert(fi->valid);
      scf_at_solve_fpga(&fi->scf, x, fi->w4, fi->w5, fi->w1, fi->w2);
      return;
}

void bfd_btran_fpga(BFD *bfd, double x[])
{     /* perform backward transformation (solve system B'* x = b) */
#ifdef GLP_DEBUG
      SPM *B = bfd->B;
      int m = B->m;
      double *b = talloc(1+m, double);
      SPME *e;
      int k;
      double s, relerr, maxerr;
      for (k = 1; k <= m; k++)
         b[k] = x[k];
#endif
      xassert(bfd->valid);
      switch (bfd->type)
      {  case 1:
            fhvint_btran_fpga(bfd->u.fhvi, x);
            break;
         case 2:
            scfint_btran_fpga(bfd->u.scfi, x);
            break;
         default:
            xassert(bfd != bfd);
      }
#ifdef GLP_DEBUG
      maxerr = 0.0;
      for (k = 1; k <= m; k++)
      {  s = 0.0;
         for (e = B->col[k]; e != NULL; e = e->c_next)
            s += e->val * x[e->i];
         relerr = (b[k] - s) / (1.0 + fabs(b[k]));
         if (maxerr < relerr)
            maxerr = relerr;
      }
      if (maxerr > 1e-8)
         xprintf("bfd_btran: maxerr = %g; relative error too large\n",
            maxerr);
      tfree(b);
#endif
      return;
}


/***********************************************************************
*  spx_update_gamma - update projected steepest edge weights exactly
*
*  This routine updates the vector gamma = (gamma[j]) of projected
*  steepest edge weights exactly, for the adjacent basis.
*
*  On entry to the routine the content of the se object should be valid
*  and should correspond to the current basis.
*
*  The parameter 1 <= p <= m specifies basic variable xB[p] which
*  becomes non-basic variable xN[q] in the adjacent basis.
*
*  The parameter 1 <= q <= n-m specified non-basic variable xN[q] which
*  becomes basic variable xB[p] in the adjacent basis.
*
*  It is assumed that the array trow contains elements of p-th (pivot)
*  row T'[p] of the simplex table in locations trow[1], ..., trow[n-m].
*  It is also assumed that the array tcol contains elements of q-th
*  (pivot) column T[q] of the simple table in locations tcol[1], ...,
*  tcol[m]. (These row and column should be computed for the current
*  basis.)
*
*  For details about the formulae used see the program documentation.
*
*  The routine also computes the relative error:
*
*     e = |gamma[q] - gamma'[q]| / (1 + |gamma[q]|),
*
*  where gamma'[q] is the weight for xN[q] on entry to the routine,
*  and returns e on exit. (If e happens to be large enough, the calling
*  program may reset the reference space, since other weights also may
*  be inaccurate.) */

double spx_update_gamma_fpga(SPXLP *lp, SPXSE *se, int p, int q,
      const double trow[/*1+n-m*/], const double tcol[/*1+m*/])
{     int m = lp->m;
      int n = lp->n;
      int *head = lp->head;
      char *refsp = se->refsp;
      double *gamma = se->gamma;
      double *u = se->work;
      int i, j, k, ptr, end;
      double gamma_q, delta_q, e, r, s, t1, t2;
      xassert(se->valid);
      xassert(1 <= p && p <= m);
      xassert(1 <= q && q <= n-m);
      /* compute gamma[q] in current basis more accurately; also
       * compute auxiliary vector u */
      k = head[m+q]; /* x[k] = xN[q] */
      gamma_q = delta_q = (refsp[k] ? 1.0 : 0.0);
      for (i = 1; i <= m; i++)
      {  k = head[i]; /* x[k] = xB[i] */
         if (refsp[k])
         {  gamma_q += tcol[i] * tcol[i];
            u[i] = tcol[i];
         }
         else
            u[i] = 0.0;
      }
      bfd_btran_fpga(lp->bfd, u);
      /* compute relative error in gamma[q] */
      e = fabs(gamma_q - gamma[q]) / (1.0 + gamma_q);
      /* compute new gamma[q] */
      gamma[q] = gamma_q / (tcol[p] * tcol[p]);
      /* compute new gamma[j] for all j != q */
      for (j = 1; j <= n-m; j++)
      {  if (j == q)
            continue;
         if (-1e-9 < trow[j] && trow[j] < +1e-9)
         {  /* T[p,j] is close to zero; gamma[j] is not changed */
            continue;
         }
         /* compute r[j] = T[p,j] / T[p,q] */
         r = trow[j] / tcol[p];
         /* compute inner product s[j] = N'[j] * u, where N[j] = A[k]
          * is constraint matrix column corresponding to xN[j] */
         s = 0.0;
         k = head[m+j]; /* x[k] = xN[j] */
         ptr = lp->A_ptr[k];
         end = lp->A_ptr[k+1];
         for (; ptr < end; ptr++)
            s += lp->A_val[ptr] * u[lp->A_ind[ptr]];
         /* compute new gamma[j] */
         t1 = gamma[j] + r * (r * gamma_q + s + s);
         t2 = (refsp[k] ? 1.0 : 0.0) + delta_q * r * r;
         gamma[j] = (t1 >= t2 ? t1 : t2);
      }
      return e;
}


#if 1 /* 30/III-2016 */
double spx_update_d_s_fpga(SPXLP *lp, double d[/*1+n-m*/], int p, int q,
      const FVS *trow, const FVS *tcol)
{     /* sparse version of spx_update_d */
      int m = lp->m;
      int n = lp->n;
      double *c = lp->c;
      int *head = lp->head;
      int trow_nnz = trow->nnz;
      int *trow_ind = trow->ind;
      double *trow_vec = trow->vec;
      int tcol_nnz = tcol->nnz;
      int *tcol_ind = tcol->ind;
      double *tcol_vec = tcol->vec;
      int i, j, k;
      double dq, e;
      xassert(1 <= p && p <= m);
      xassert(1 <= q && q <= n);
      xassert(trow->n == n-m);
      xassert(tcol->n == m);
      /* compute d[q] in current basis more accurately */
      k = head[m+q]; /* x[k] = xN[q] */
      dq = c[k];
      for (k = 1; k <= tcol_nnz; k++)
      {  i = tcol_ind[k];
         dq += tcol_vec[i] * c[head[i]];
      }
      /* compute relative error in d[q] */
      e = fabs(dq - d[q]) / (1.0 + fabs(dq));
      /* compute new d[q], which is the reduced cost of xB[p] in the
       * adjacent basis */
      d[q] = (dq /= tcol_vec[p]);
      /* compute new d[j] for all j != q */
      for (k = 1; k <= trow_nnz; k++)
      {  j = trow_ind[k];
         if (j != q)
            d[j] -= trow_vec[j] * dq;
      }
      return e;
}
#endif

void spx_eval_trow1_fpga(SPXLP *lp, SPXAT *at, const double rho[/*1+m*/],
      double trow[/*1+n-m*/])
{     int m = lp->m;
      int n = lp->n;
      int nnz = lp->nnz;
      int i, j, nnz_rho;
      double cnt1, cnt2;
      /* determine nnz(rho) */
      nnz_rho = 0;
      for (i = 1; i <= m; i++)
      {  if (rho[i] != 0.0)
            nnz_rho++;
      }
      /* estimate the number of operations for both ways */
      cnt1 = (double)(n - m) * ((double)nnz / (double)n);
      cnt2 = (double)nnz_rho * ((double)nnz / (double)m);
      /* compute i-th row of simplex table */
      if (cnt1 < cnt2)
      {  /* as inner products */
         int *A_ptr = lp->A_ptr;
         int *A_ind = lp->A_ind;
         double *A_val = lp->A_val;
         int *head = lp->head;
         int k, ptr, end;
         double tij;
         for (j = 1; j <= n-m; j++)
         {  k = head[m+j]; /* x[k] = xN[j] */
            /* compute t[i,j] = - N'[j] * pi */
            tij = 0.0;
            ptr = A_ptr[k];
            end = A_ptr[k+1];
            for (; ptr < end; ptr++)
               tij -= A_val[ptr] * rho[A_ind[ptr]];
            trow[j] = tij;
         }
      }
      else
      {  /* as linear combination */
         spx_nt_prod1(lp, at, trow, 1, -1.0, rho);
      }
      return;
}

/***********************************************************************
*  spx_nt_prod - compute product y := y + s * N'* x
*
*  This routine computes the product:
*
*     y := y + s * N'* x,
*
*  where N' is a matrix transposed to the mx(n-m)-matrix N composed
*  from non-basic columns of the constraint matrix A, x is a m-vector,
*  s is a scalar, y is (n-m)-vector.
*
*  If the flag ign is non-zero, the routine ignores the input content
*  of the array y assuming that y = 0.
*
*  The routine uses the row-wise representation of the matrix N and
*  computes the product as a linear combination:
*
*     y := y + s * (N'[1] * x[1] + ... + N'[m] * x[m]),
*
*  where N'[i] is i-th row of N, 1 <= i <= m. */

void spx_nt_prod_fpga(SPXLP *lp, SPXNT *nt, double y[/*1+n-m*/], int ign,
      double s, const double x[/*1+m*/])
{     int m = lp->m;
      int n = lp->n;
      int *NT_ptr = nt->ptr;
      int *NT_len = nt->len;
      int *NT_ind = nt->ind;
      double *NT_val = nt->val;
      int i, j, ptr, end;
      double t;
      if (ign)
      {  /* y := 0 */
         for (j = 1; j <= n-m; j++)
            y[j] = 0.0;
      }
      for (i = 1; i <= m; i++)
      {  if (x[i] != 0.0)
         {  /* y := y + s * (i-th row of N) * x[i] */
            t = s * x[i];
            ptr = NT_ptr[i];
            end = ptr + NT_len[i];
            for (; ptr < end; ptr++)
               y[NT_ind[ptr]] += NT_val[ptr] * t;
         }
      }
      return;
}

void fvs_gather_vec_fpga(FVS *x, double eps)
{     /* gather sparse vector */
      int n = x->n;
      int *ind = x->ind;
      double *vec = x->vec;
      int j, nnz = 0;
      for (j = n; j >= 1; j--)
      {  if (-eps < vec[j] && vec[j] < +eps)
            vec[j] = 0.0;
         else
            ind[++nnz] = j;
      }
      x->nnz = nnz;
      return;
}


/***********************************************************************
*  spx_reset_refsp - reset reference space
*
*  This routine resets (re-initializes) the reference space composing
*  it from variables which are non-basic in the current basis, and sets
*  all weights gamma[j] to 1. */

void spx_reset_refsp_fpga(SPXLP *lp, SPXSE *se)
{     int m = lp->m;
      int n = lp->n;
      int *head = lp->head;
      char *refsp = se->refsp;
      double *gamma = se->gamma;
      int j, k;
      se->valid = 1;
      memset(&refsp[1], 0, n * sizeof(char));
      for (j = 1; j <= n-m; j++)
      {  k = head[m+j]; /* x[k] = xN[j] */
         refsp[k] = 1;
         gamma[j] = 1.0;
      }
      return;
}


/***********************************************************************
*  spx_eval_dj - compute reduced cost of j-th non-basic variable
*
*  This routine computes reduced cost d[j] of non-basic variable
*  xN[j] = x[k], 1 <= j <= n-m, in the current basic solution:
*
*     d[j] = c[k] - A'[k] * pi,
*
*  where c[k] is the objective coefficient at x[k], A[k] is k-th column
*  of the constraint matrix, pi is the vector of simplex multipliers in
*  the current basis.
*
*  It as assumed that components of the vector pi are stored in the
*  array locations pi[1], ..., pi[m]. */

double spx_eval_dj_fpga(SPXLP *lp, const double pi[/*1+m*/], int j)
{     int m = lp->m;
      int n = lp->n;
      int *A_ptr = lp->A_ptr;
      int *A_ind = lp->A_ind;
      double *A_val = lp->A_val;
      int k, ptr, end;
      double dj;
      xassert(1 <= j && j <= n-m);
      k = lp->head[m+j]; /* x[k] = xN[j] */
      /* dj := c[k] */
      dj = lp->c[k];
      /* dj := dj - A'[k] * pi */
      ptr = A_ptr[k];
      end = A_ptr[k+1];
      for (; ptr < end; ptr++)
         dj -= A_val[ptr] * pi[A_ind[ptr]];
      return dj;
}


/***********************************************************************
*  fhv_h_solve - solve system H * x = b
*
*  This routine solves the system H * x = b, where the matrix H is the
*  middle factor of the sparse updatable FHV-factorization.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix H. On exit this array will contain elements of the solution
*  vector x in the same locations. */

void fhv_h_solve_fpga(FHV *fhv, double x[/*1+n*/])
{     SVA *sva = fhv->luf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int nfs = fhv->nfs;
      int *hh_ind = fhv->hh_ind;
      int hh_ref = fhv->hh_ref;
      int *hh_ptr = &sva->ptr[hh_ref-1];
      int *hh_len = &sva->len[hh_ref-1];
      int i, k, end, ptr;
      double x_i;
      for (k = 1; k <= nfs; k++)
      {  x_i = x[i = hh_ind[k]];
         for (end = (ptr = hh_ptr[k]) + hh_len[k]; ptr < end; ptr++)
            x_i -= sv_val[ptr] * x[sv_ind[ptr]];
         x[i] = x_i;
      }
      return;
}


void fhvint_ftran_fpga(FHVINT *fi, double x[])
{     /* solve system A * x = b */
      FHV *fhv = &fi->fhv;
      LUF *luf = fhv->luf;
      int n = luf->n;
      int *pp_ind = luf->pp_ind;
      int *pp_inv = luf->pp_inv;
      SGF *sgf = fi->lufi->sgf;
      double *work = sgf->work;
      xassert(fi->valid);
      /* A = F * H * V */
      /* x = inv(A) * b = inv(V) * inv(H) * inv(F) * b */
      luf->pp_ind = fhv->p0_ind;
      luf->pp_inv = fhv->p0_inv;
      luf_f_solve_fpga(luf, x);
      luf->pp_ind = pp_ind;
      luf->pp_inv = pp_inv;
      fhv_h_solve_fpga(fhv, x);
      luf_v_solve_fpga(luf, x, work);
      memcpy(&x[1], &work[1], n * sizeof(double));
      return;
}

void scfint_ftran_fpga(SCFINT *fi, double x[])
{     /* solve system A * x = b */
      xassert(fi->valid);
      scf_a_solve_fpga(&fi->scf, x, fi->w4, fi->w5, fi->w1, fi->w2);
      return;
}

/***********************************************************************
*  luf_f_solve - solve system F * x = b
*
*  This routine solves the system F * x = b, where the matrix F is the
*  left factor of the sparse LU-factorization.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix F. On exit this array will contain elements of the solution
*  vector x in the same locations. */

void luf_f_solve_fpga(LUF *luf, double x[/*1+n*/])
{     int n = luf->n;
      SVA *sva = luf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int fc_ref = luf->fc_ref;
      int *fc_ptr = &sva->ptr[fc_ref-1];
      int *fc_len = &sva->len[fc_ref-1];
      int *pp_inv = luf->pp_inv;
      int j, k, ptr, end;
      double x_j;
      for (k = 1; k <= n; k++)
      {  /* k-th column of L = j-th column of F */
         j = pp_inv[k];
         /* x[j] is already computed */
         /* walk thru j-th column of matrix F and substitute x[j] into
          * other equations */
         if ((x_j = x[j]) != 0.0)
         {  for (end = (ptr = fc_ptr[j]) + fc_len[j]; ptr < end; ptr++)
               x[sv_ind[ptr]] -= sv_val[ptr] * x_j;
         }
      }
      return;
}


/***********************************************************************
*  scf_r_prod - compute product y := y + alpha * R * x
*
*  This routine computes the product y := y + alpha * R * x, where
*  x is a n0-vector, alpha is a scalar, y is a nn-vector.
*
*  Since matrix R is available by rows, the product components are
*  computed as inner products:
*
*     y[i] = y[i] + alpha * (i-th row of R) * x
*
*  for i = 1, 2, ..., nn. */

void scf_r_prod_fpga(SCF *scf, double y[/*1+nn*/], double a, const double
      x[/*1+n0*/])
{     int nn = scf->nn;
      SVA *sva = scf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int rr_ref = scf->rr_ref;
      int *rr_ptr = &sva->ptr[rr_ref-1];
      int *rr_len = &sva->len[rr_ref-1];
      int i, ptr, end;
      double t;
      for (i = 1; i <= nn; i++)
      {  /* t := (i-th row of R) * x */
         t = 0.0;
         for (end = (ptr = rr_ptr[i]) + rr_len[i]; ptr < end; ptr++)
            t += sv_val[ptr] * x[sv_ind[ptr]];
         /* y[i] := y[i] + alpha * t */
         y[i] += a * t;
      }
      return;
}


/***********************************************************************
*  ifu_a_solve - solve system A * x = b
*
*  This routine solves the system A * x = b, where the matrix A is
*  specified by its IFU-factorization.
*
*  Using the main equality F * A = U we have:
*
*     A * x = b  =>  F * A * x = F * b  =>  U * x = F * b  =>
*
*     x = inv(U) * F * b.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix A. On exit this array will contain elements of the solution
*  vector x in the same locations.
*
*  The working array w should have at least 1+n elements (0-th element
*  is not used). */

void ifu_a_solve_fpga(IFU *ifu, double x[/*1+n*/], double w[/*1+n*/])
{     /* non-optimized version */
      int n_max = ifu->n_max;
      int n = ifu->n;
      double *f_ = ifu->f;
      double *u_ = ifu->u;
      int i, j;
      double t;
#     define f(i,j) f_[(i)*n_max+(j)]
#     define u(i,j) u_[(i)*n_max+(j)]
      xassert(0 <= n && n <= n_max);
      /* adjust indexing */
      x++, w++;
      /* y := F * b */
      memcpy(w, x, n * sizeof(double));
      for (i = 0; i < n; i++)
      {  /* y[i] := (i-th row of F) * b */
         t = 0.0;
         for (j = 0; j < n; j++)
            t += f(i,j) * w[j];
         x[i] = t;
      }
      /* x := inv(U) * y */
      for (i = n-1; i >= 0; i--)
      {  t = x[i];
         for (j = i+1; j < n; j++)
            t -= u(i,j) * x[j];
         x[i] = t / u(i,i);
      }
#     undef f
#     undef u
      return;
}

/***********************************************************************
*  scf_s_prod - compute product y := y + alpha * S * x
*
*  This routine computes the product y := y + alpha * S * x, where
*  x is a nn-vector, alpha is a scalar, y is a n0 vector.
*
*  Since matrix S is available by columns, the product is computed as
*  a linear combination:
*
*     y := y + alpha * (S[1] * x[1] + ... + S[nn] * x[nn]),
*
*  where S[j] is j-th column of S. */

void scf_s_prod_fpga(SCF *scf, double y[/*1+n0*/], double a, const double
      x[/*1+nn*/])
{     int nn = scf->nn;
      SVA *sva = scf->sva;
      int *sv_ind = sva->ind;
      double *sv_val = sva->val;
      int ss_ref = scf->ss_ref;
      int *ss_ptr = &sva->ptr[ss_ref-1];
      int *ss_len = &sva->len[ss_ref-1];
      int j, ptr, end;
      double t;
      for (j = 1; j <= nn; j++)
      {  if (x[j] == 0.0)
            continue;
         /* y := y + alpha * S[j] * x[j] */
         t = a * x[j];
         for (end = (ptr = ss_ptr[j]) + ss_len[j]; ptr < end; ptr++)
            y[sv_ind[ptr]] += sv_val[ptr] * t;
      }
      return;
}

/***********************************************************************
*  scf_a_solve - solve system A * x = b
*
*  This routine solves the system A * x = b, where A is the current
*  matrix.
*
*  On entry the array x should contain elements of the right-hand side
*  vector b in locations x[1], ..., x[n], where n is the order of the
*  matrix A. On exit the array x will contain elements of the solution
*  vector in the same locations.
*
*  For details see the program documentation. */

void scf_a_solve_fpga(SCF *scf, double x[/*1+n*/],
      double w[/*1+n0+nn*/], double work1[/*1+max(n0,nn)*/],
      double work2[/*1+n*/], double work3[/*1+n*/])
{     int n = scf->n;
      int n0 = scf->n0;
      int nn = scf->nn;
      int *pp_ind = scf->pp_ind;
      int *qq_inv = scf->qq_inv;
      int i, ii;
      /* (u1, u2) := inv(P) * (b, 0) */
      for (ii = 1; ii <= n0+nn; ii++)
      {  i = pp_ind[ii];
#if 1 /* FIXME: currently P = I */
         xassert(i == ii);
#endif
         w[ii] = (i <= n ? x[i] : 0.0);
      }
      /* v1 := inv(R0) * u1 */
      scf_r0_solve_fpga(scf, 0, &w[0]);
      /* v2 := u2 - R * v1 */
      scf_r_prod_fpga(scf, &w[n0], -1.0, &w[0]);
      /* w2 := inv(C) * v2 */
      ifu_a_solve_fpga(&scf->ifu, &w[n0], work1);
      /* w1 := inv(S0) * (v1 - S * w2) */
      scf_s_prod_fpga(scf, &w[0], -1.0, &w[n0]);
      scf_s0_solve_fpga(scf, 0, &w[0], work1, work2, work3);
      /* (x, x~) := inv(Q) * (w1, w2); x~ is not needed */
      for (i = 1; i <= n; i++)
         x[i] = w[qq_inv[i]];
      return;
}

void bfd_ftran_fpga(BFD *bfd, double x[])
{     /* perform forward transformation (solve system B * x = b) */
      //xassert(bfd->valid);
      switch (bfd->type)
      {  case 1:
            fhvint_ftran_fpga(bfd->u.fhvi, x);
            break;
         case 2:
            scfint_ftran_fpga(bfd->u.scfi, x);
            break;
         default:
            xassert(bfd != bfd);
      }
      return;
}


/***********************************************************************
*  spx_eval_beta - compute current values of basic variables
*
*  This routine computes vector beta = (beta[i]) of current values of
*  basic variables xB = (xB[i]). (Factorization of the current basis
*  matrix should be valid.)
*
*  First the routine computes a modified vector of right-hand sides:
*
*                         n-m
*     y = b - N * f = b - sum N[j] * f[j],
*                         j=1
*
*  where b = (b[i]) is the original vector of right-hand sides, N is
*  a matrix composed from columns of the original constraint matrix A,
*  which (columns) correspond to non-basic variables, f = (f[j]) is the
*  vector of active bounds of non-basic variables xN = (xN[j]),
*  N[j] = A[k] is a column of matrix A corresponding to non-basic
*  variable xN[j] = x[k], f[j] is current active bound lN[j] = l[k] or
*  uN[j] = u[k] of non-basic variable xN[j] = x[k]. The matrix-vector
*  product N * f is computed as a linear combination of columns of N,
*  so if f[j] = 0, column N[j] can be skipped.
*
*  Then the routine performs FTRAN to compute the vector beta:
*
*     beta = inv(B) * y.
*
*  On exit the routine stores components of the vector beta to array
*  locations beta[1], ..., beta[m]. */

void spx_eval_beta_fpga(SPXLP *lp, double beta[/*1+m*/])
{     int m = lp->m;
      int n = lp->n;
      int *A_ptr = lp->A_ptr;
      int *A_ind = lp->A_ind;
      double *A_val = lp->A_val;
      double *b = lp->b;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      char *flag = lp->flag;
      int j, k, ptr, end;
      double fj, *y;
      /* compute y = b - N * xN */
      /* y := b */
      y = beta;

      memcpy(&y[1], &b[1], m * sizeof(double));


      /* y := y - N * f */
      for (j = 1; j <= n-m; j++)
      {  k = head[m+j]; /* x[k] = xN[j] */
         /* f[j] := active bound of xN[j] */
         fj = flag[j] ? u[k] : l[k];
         if (fj == 0.0 || fj == -DBL_MAX)
         {  /* either xN[j] has zero active bound or it is unbounded;
             * in the latter case its value is assumed to be zero */
            continue;
         }
         /* y := y - N[j] * f[j] */
         ptr = A_ptr[k];
         end = A_ptr[k+1];
         for (; ptr < end; ptr++)
            y[A_ind[ptr]] -= A_val[ptr] * fj;
      }
      /* compute beta = inv(B) * y */
      //xassert(lp->valid);
      bfd_ftran_fpga(lp->bfd, beta);
      return;
}


/***********************************************************************
*  spx_nt_del_col - remove column N[j] = A[k] from matrix N
*
*  This routine removes elements of column N[j] = A[k], 1 <= j <= n-m,
*  1 <= k <= n, from the row-wise representation of the matrix N. It is
*  assumed (with no check) that elements of the specified column are
*  present in the row-wise representation of N. */

void spx_nt_del_col_fpga(SPXLP *lp, SPXNT *nt, int j, int k)
{     int m = lp->m;
      int n = lp->n;
      int *A_ptr = lp->A_ptr;
      int *A_ind = lp->A_ind;
      int *NT_ptr = nt->ptr;
      int *NT_len = nt->len;
      int *NT_ind = nt->ind;
      double *NT_val = nt->val;
      int i, ptr, end, ptr1, end1;
      xassert(1 <= j && j <= n-m);
      xassert(1 <= k && k <= n);
      ptr = A_ptr[k];
      end = A_ptr[k+1];
      for (; ptr < end; ptr++)
      {  i = A_ind[ptr];
         /* find element N[i,j] = A[i,k] in i-th row of matrix N */
         ptr1 = NT_ptr[i];
         end1 = ptr1 + NT_len[i];
         for (; NT_ind[ptr1] != j; ptr1++)
            /* nop */;
         xassert(ptr1 < end1);
         /* and remove it from i-th row element list */
         NT_len[i]--;
         NT_ind[ptr1] = NT_ind[end1-1];
         NT_val[ptr1] = NT_val[end1-1];
      }
      return;
}


/***********************************************************************
*  spx_nt_add_col - add column N[j] = A[k] to matrix N
*
*  This routine adds elements of column N[j] = A[k], 1 <= j <= n-m,
*  1 <= k <= n, to the row-wise represntation of the matrix N. It is
*  assumed (with no check) that elements of the specified column are
*  missing in the row-wise represntation of N. */

void spx_nt_add_col_fpga(SPXLP *lp, SPXNT *nt, int j, int k)
{     int m = lp->m;
      int n = lp->n;
      int nnz = lp->nnz;
      int *A_ptr = lp->A_ptr;
      int *A_ind = lp->A_ind;
      double *A_val = lp->A_val;
      int *NT_ptr = nt->ptr;
      int *NT_len = nt->len;
      int *NT_ind = nt->ind;
      double *NT_val = nt->val;
      int i, ptr, end, pos;
      xassert(1 <= j && j <= n-m);
      xassert(1 <= k && k <= n);
      ptr = A_ptr[k];
      end = A_ptr[k+1];
      for (; ptr < end; ptr++)
      {  i = A_ind[ptr];
         /* add element N[i,j] = A[i,k] to i-th row of matrix N */
         pos = NT_ptr[i] + (NT_len[i]++);
         if (i < m)
            xassert(pos < NT_ptr[i+1]);
         else
            xassert(pos <= nnz);
         NT_ind[pos] = j;
         NT_val[pos] = A_val[ptr];
      }
      return;
}

/***********************************************************************
*  spx_update_nt - update matrix N for adjacent basis
*
*  This routine updates the row-wise represntation of matrix N for
*  the adjacent basis, where column N[q], 1 <= q <= n-m, is replaced by
*  column B[p], 1 <= p <= m, of the current basis matrix B. */

void spx_update_nt_fpga(SPXLP *lp, SPXNT *nt, int p, int q)
{     int m = lp->m;
      int n = lp->n;
      int *head = lp->head;
      xassert(1 <= p && p <= m);
      xassert(1 <= q && q <= n-m);
      /* remove old column N[q] corresponding to variable xN[q] */
      spx_nt_del_col_fpga(lp, nt, q, head[m+q]);
      /* add new column N[q] corresponding to variable xB[p] */
      spx_nt_add_col_fpga(lp, nt, q, head[p]);
      return;
}

#if 1 /* 21/IV-2014 */
double bfd_condest_fpga(BFD *bfd)
{     /* estimate condition of B */
      double cond;
      xassert(bfd->valid);
      /*xassert(bfd->upd_cnt == 0);*/
      cond = bfd->b_norm * bfd->i_norm;
      if (cond < 1.0)
         cond = 1.0;
      return cond;
}
#endif

/***********************************************************************
*  spx_eval_pi - compute simplex multipliers in current basis
*
*  This routine computes vector pi = (pi[i]) of simplex multipliers in
*  the current basis. (Factorization of the current basis matrix should
*  be valid.)
*
*  The vector pi is computed by performing BTRAN:
*
*     pi = inv(B') * cB,
*
*  where cB = (cB[i]) is the vector of objective coefficients at basic
*  variables xB = (xB[i]).
*
*  On exit components of vector pi are stored in the array locations
*  pi[1], ..., pi[m]. */

void spx_eval_pi_fpga(SPXLP *lp, double pi[/*1+m*/])
{     int m = lp->m;
      double *c = lp->c;
      int *head = lp->head;
      int i;
      double *cB;
      /* construct cB */
      cB = pi;
      for (i = 1; i <= m; i++)
         cB[i] = c[head[i]];
      /* compute pi = inv(B) * cB */
      bfd_btran_fpga(lp->bfd, pi);
      return;
}


/***********************************************************************
*  spx_chuzc_sel - select eligible non-basic variables
*
*  This routine selects eligible non-basic variables xN[j], whose
*  reduced costs d[j] have "wrong" sign, i.e. changing such xN[j] in
*  feasible direction improves (decreases) the objective function.
*
*  Reduced costs of non-basic variables should be placed in the array
*  locations d[1], ..., d[n-m].
*
*  Non-basic variable xN[j] is considered eligible if:
*
*     d[j] <= -eps[j] and xN[j] can increase
*
*     d[j] >= +eps[j] and xN[j] can decrease
*
*  for
*
*     eps[j] = tol + tol1 * |cN[j]|,
*
*  where cN[j] is the objective coefficient at xN[j], tol and tol1 are
*  specified tolerances.
*
*  On exit the routine stores indices j of eligible non-basic variables
*  xN[j] to the array locations list[1], ..., list[num] and returns the
*  number of such variables 0 <= num <= n-m. (If the parameter list is
*  specified as NULL, no indices are stored.) */

int spx_chuzc_sel_fpga(SPXLP *lp, const double d[/*1+n-m*/], double tol,
      double tol1, int list[/*1+n-m*/])
{     int m = lp->m;
      int n = lp->n;
      double *c = lp->c;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      char *flag = lp->flag;
      int j, k, num;
      double ck, eps;
      num = 0;
      /* walk thru list of non-basic variables */
      for (j = 1; j <= n-m; j++)
      {  k = head[m+j]; /* x[k] = xN[j] */
         if (l[k] == u[k])
         {  /* xN[j] is fixed variable; skip it */
            continue;
         }
         /* determine absolute tolerance eps[j] */
         ck = c[k];
         eps = tol + tol1 * (ck >= 0.0 ? +ck : -ck);
         /* check if xN[j] is eligible */
         if (d[j] <= -eps)
         {  /* xN[j] should be able to increase */
            if (flag[j])
            {  /* but its upper bound is active */
               continue;
            }
         }
         else if (d[j] >= +eps)
         {  /* xN[j] should be able to decrease */
            if (!flag[j] && l[k] != -DBL_MAX)
            {  /* but its lower bound is active */
               continue;
            }
         }
         else /* -eps < d[j] < +eps */
         {  /* xN[j] does not affect the objective function within the
             * specified tolerance */
            continue;
         }
         /* xN[j] is eligible non-basic variable */
         num++;
         if (list != NULL)
            list[num] = j;
      }
      return num;
}


#if 1 /* 30/III-2016 */
void spx_update_beta_s_fpga(SPXLP *lp, double beta[/*1+m*/], int p,
      int p_flag, int q, const FVS *tcol)
{     /* sparse version of spx_update_beta */
      int m = lp->m;
      int n = lp->n;
      double *l = lp->l;
      double *u = lp->u;
      int *head = lp->head;
      char *flag = lp->flag;
      int nnz = tcol->nnz;
      int *ind = tcol->ind;
      double *vec = tcol->vec;
      int i, k;
      double delta_p, delta_q;
      xassert(tcol->n == m);
      if (p < 0)
      {  /* special case: xN[q] goes to its opposite bound */
#if 0 /* 11/VI-2017 */
         /* FIXME: not tested yet */
         xassert(0);
#endif
         xassert(1 <= q && q <= n-m);
         /* xN[q] should be double-bounded variable */
         k = head[m+q]; /* x[k] = xN[q] */
         xassert(l[k] != -DBL_MAX && u[k] != +DBL_MAX && l[k] != u[k]);
         /* determine delta xN[q] */
         if (flag[q])
         {  /* xN[q] goes from its upper bound to its lower bound */
            delta_q = l[k] - u[k];
         }
         else
         {  /* xN[q] goes from its lower bound to its upper bound */
            delta_q = u[k] - l[k];
         }
      }
      else
      {  /* xB[p] leaves the basis, xN[q] enters the basis */
         xassert(1 <= p && p <= m);
         xassert(1 <= q && q <= n-m);
         /* determine delta xB[p] */
         k = head[p]; /* x[k] = xB[p] */
         if (p_flag)
         {  /* xB[p] goes to its upper bound */
            xassert(l[k] != u[k] && u[k] != +DBL_MAX);
            delta_p = u[k] - beta[p];
         }
         else if (l[k] == -DBL_MAX)
         {  /* unbounded xB[p] becomes non-basic (unusual case) */
            xassert(u[k] == +DBL_MAX);
            delta_p = 0.0 - beta[p];
         }
         else
         {  /* xB[p] goes to its lower bound or becomes fixed */
            delta_p = l[k] - beta[p];
         }
         /* determine delta xN[q] */
         delta_q = delta_p / vec[p];
         /* compute new beta[p], which is the value of xN[q] in the
          * adjacent basis */
         k = head[m+q]; /* x[k] = xN[q] */
         if (flag[q])
         {  /* xN[q] has its upper bound active */
            xassert(l[k] != u[k] && u[k] != +DBL_MAX);
            beta[p] = u[k] + delta_q;
         }
         else if (l[k] == -DBL_MAX)
         {  /* xN[q] is non-basic unbounded variable */
            xassert(u[k] == +DBL_MAX);
            beta[p] = 0.0 + delta_q;
         }
         else
         {  /* xN[q] has its lower bound active or is fixed (latter
             * case is unusual) */
            beta[p] = l[k] + delta_q;
         }
      }
      /* compute new beta[i] for all i != p */
      for (k = 1; k <= nnz; k++)
      {  i = ind[k];
         if (i != p)
            beta[i] += vec[i] * delta_q;
      }
      return;
}
#endif


/***********************************************************************
*  spx_primal - driver to the primal simplex method
*
*  This routine is a driver to the two-phase primal simplex method.
*
*  On exit this routine returns one of the following codes:
*
*  0  LP instance has been successfully solved.
*
*  GLP_EITLIM
*     Iteration limit has been exhausted.
*
*  GLP_ETMLIM
*     Time limit has been exhausted.
*
*  GLP_EFAIL
*     The solver failed to solve LP instance. */

int primal_simplex_fpga(struct csa *csa, void (*funcptr)(void *csa))
{     /* primal simplex method main logic routine */
      SPXLP *lp = csa->lp;
      int m = lp->m;
      int n = lp->n;
      double *c = lp->c;
      int *head = lp->head;
      SPXAT *at = csa->at;
      SPXNT *nt = csa->nt;
      double *beta = csa->beta;
      double *d = csa->d;
      SPXSE *se = csa->se;
      int *list = csa->list;

      double *pi = csa->work.vec;
      double *rho = csa->work.vec;

      int msg_lev = csa->msg_lev;
      double tol_bnd = csa->tol_bnd;
      double tol_bnd1 = csa->tol_bnd1;
      double tol_dj = csa->tol_dj;
      double tol_dj1 = csa->tol_dj1;


      int perturb = -1;
      /* -1 = perturbation is not used, but enabled
       *  0 = perturbation is not used and disabled
       * +1 = perturbation is being used */
      int j, refct, ret;

loop: /* main loop starts here */

      /* compute values of basic variables beta = (beta[i]) */
      if (!csa->beta_st)
      {  spx_eval_beta_fpga(lp, beta);
         csa->beta_st = 1; /* just computed */
         /* determine the search phase, if not determined yet */
         if (!csa->phase)
         {  if (set_penalty_fpga(csa, 0.97 * tol_bnd, 0.97 * tol_bnd1))
            {  /* current basic solution is primal infeasible */
               /* start to minimize the sum of infeasibilities */
               csa->phase = 1;
            }
            else
            {  /* current basic solution is primal feasible */
               /* start to minimize the original objective function */
               csa->phase = 2;
               memcpy(c, csa->orig_c, (1+n) * sizeof(double));
            }
            /* working objective coefficients have been changed, so
             * invalidate reduced costs */
            csa->d_st = 0;
         }

         /* make sure that the current basic solution remains primal
          * feasible (or pseudo-feasible on phase I) */
         if (perturb <= 0)
         {  if (check_feas_fpga(csa, csa->phase, tol_bnd, tol_bnd1))
            {  /* excessive bound violations due to round-off errors */
               if (perturb < 0)
               {  if (msg_lev >= GLP_MSG_ALL)
                     xprintf("Perturbing LP to avoid instability [%d].."
                        ".\n", csa->it_cnt);
                  perturb = 1;
                  goto loop;
               }


               if (msg_lev >= GLP_MSG_ERR)
                  xprintf("Warning: numerical instability (primal simpl"
                     "ex, phase %s)\n", csa->phase == 1 ? "I" : "II");
               /* restart the search */
               lp->valid = 0;
               csa->phase = 0;
               goto loop;
            }


            if (csa->phase == 1)
            {  int i, cnt;
               for (i = 1; i <= m; i++)
                  csa->tcol.ind[i] = i;

               cnt = adjust_penalty_fpga(csa, m, csa->tcol.ind,
                  0.99 * tol_bnd, 0.99 * tol_bnd1);

               if (cnt)
               {  /*xprintf("*** cnt = %d\n", cnt);*/
                  csa->d_st = 0;
               }
            }
         }
         else
         {  /* FIXME */
            play_bounds_fpga(csa, 1);
         }
      }


      /* compute reduced costs of non-basic variables d = (d[j]) */
      if (!csa->d_st)
      {  spx_eval_pi_fpga(lp, pi);
         for (j = 1; j <= n-m; j++)
            d[j] = spx_eval_dj_fpga(lp, pi, j);
         csa->d_st = 1; /* just computed */
      }

      int a = 123434;
      void *tempCSA = &a;
      funcptr(tempCSA);
      printf("updated value of a: %d\n", a);


      /* reset the reference space, if necessary */
      if (se != NULL && !se->valid)
         spx_reset_refsp_fpga(lp, se), refct = 1000;


      /* select eligible non-basic variables */
      switch (csa->phase)
      {  case 1:
            csa->num = spx_chuzc_sel_fpga(lp, d, 1e-8, 0.0, list);
            break;
         case 2:
            csa->num = spx_chuzc_sel_fpga(lp, d, tol_dj, tol_dj1, list);
            break;
         default:
            xassert(csa != csa);
      }


      /* check for optimality */
      if (csa->num == 0)
      {  if (perturb > 0 && csa->phase == 2)
         {  /* remove perturbation */
            remove_perturb_fpga(csa);
            perturb = 0;
         }
         if (csa->beta_st != 1)
            csa->beta_st = 0;
         if (csa->d_st != 1)
            csa->d_st = 0;
         if (!(csa->beta_st && csa->d_st))
            goto loop;


         switch (csa->phase)
         {  case 1:
               /* check for primal feasibility */
               if (!check_feas_fpga(csa, 2, tol_bnd, tol_bnd1))
               {  /* feasible solution found; switch to phase II */
                  memcpy(c, csa->orig_c, (1+n) * sizeof(double));
                  csa->phase = 2;
                  csa->d_st = 0;
                  goto loop;
               }

               /* no feasible solution exists */

               if (msg_lev >= GLP_MSG_ALL)
                  xprintf("LP HAS NO PRIMAL FEASIBLE SOLUTION\n");
               csa->p_stat = GLP_NOFEAS;
               csa->d_stat = GLP_UNDEF; /* will be set below */
               ret = 0;
               goto fini;
            case 2:
               /* optimal solution found */
               if (msg_lev >= GLP_MSG_ALL)
                  xprintf("OPTIMAL LP SOLUTION FOUND\n");
               csa->p_stat = csa->d_stat = GLP_FEAS;
               ret = 0;
               goto fini;
            default:
               xassert(csa != csa);
         }
      }
      /* choose xN[q] and xB[p] */

      ret = choose_pivot_fpga(csa);
      if (ret < 0)
      {  lp->valid = 0;
         goto loop;
      }


      if (ret == 0)
         csa->ns_cnt++;
      else
         csa->ls_cnt++;

      /* check for unboundedness */
      if (csa->p == 0)
      {  if (perturb > 0)
         {  /* remove perturbation */
            remove_perturb_fpga(csa);
            perturb = 0;
         }
         if (csa->beta_st != 1)
            csa->beta_st = 0;
         if (csa->d_st != 1)
            csa->d_st = 0;
         if (!(csa->beta_st && csa->d_st))
            goto loop;


         switch (csa->phase)
         {  case 1:
               /* this should never happen */
               if (msg_lev >= GLP_MSG_ERR)
                  xprintf("Error: primal simplex failed\n");
               csa->p_stat = csa->d_stat = GLP_UNDEF;
               ret = GLP_EFAIL;
               goto fini;

            case 2:
               /* primal unboundedness detected */
               if (msg_lev >= GLP_MSG_ALL)
                  xprintf("LP HAS UNBOUNDED PRIMAL SOLUTION\n");
               csa->p_stat = GLP_FEAS;
               csa->d_stat = GLP_NOFEAS;
               ret = 0;
               goto fini;

            default:
               xassert(csa != csa);
         }
      }

      /* update values of basic variables for adjacent basis */

      spx_update_beta_s_fpga(lp, beta, csa->p, csa->p_flag, csa->q,
         &csa->tcol);
      csa->beta_st = 2;
      /* p < 0 means that xN[q] jumps to its opposite bound */
      if (csa->p < 0)
         goto skip;
      /* xN[q] enters and xB[p] leaves the basis */
      /* compute p-th row of inv(B) */
      spx_eval_rho(lp, csa->p, rho);
      /* compute p-th (pivot) row of the simplex table */

      if (at != NULL)
         spx_eval_trow1_fpga(lp, at, rho, csa->trow.vec);
      else
         spx_nt_prod_fpga(lp, nt, csa->trow.vec, 1, -1.0, rho);


      fvs_gather_vec_fpga(&csa->trow, DBL_EPSILON);

      /* FIXME: tcol[p] and trow[q] should be close to each other */

      if (csa->trow.vec[csa->q] == 0.0)
      {  if (msg_lev >= GLP_MSG_ERR)
            xprintf("Error: trow[q] = 0.0\n");
         csa->p_stat = csa->d_stat = GLP_UNDEF;
         ret = GLP_EFAIL;
         goto fini;
      }


      /* update reduced costs of non-basic variables for adjacent
       * basis */
      /* dual solution may be invalidated due to long step */
      if (csa->d_st)
          if (spx_update_d_s_fpga(lp, d, csa->p, csa->q, &csa->trow, &csa->tcol)
             <= 1e-9)
          {  /* successful updating */
             csa->d_st = 2;
             if (csa->phase == 1)
             {  /* adjust reduced cost of xN[q] in adjacent basis, since
                 * its penalty coefficient changes (see below) */
                d[csa->q] -= c[head[csa->p]];
             }
          }
          else
          {  /* new reduced costs are inaccurate */
             csa->d_st = 0;
          }


      if (csa->phase == 1)
      {  /* xB[p] leaves the basis replacing xN[q], so set its penalty
          * coefficient to zero */
         c[head[csa->p]] = 0.0;
      }

      /* update steepest edge weights for adjacent basis, if used */
      if (se != NULL)
      {  if (refct > 0)

    	  /* FIXME: spx_update_gamma_s */
         {  if (spx_update_gamma_fpga(lp, se, csa->p, csa->q, csa->trow.vec,
               csa->tcol.vec) <= 1e-3)
            {  /* successful updating */
               refct--;
            }
            else
            {  /* new weights are inaccurate; reset reference space */
               se->valid = 0;
            }
         }
         else
         {  /* too many updates; reset reference space */
            se->valid = 0;
         }
      }


      /* update matrix N for adjacent basis, if used */
      if (nt != NULL)
         spx_update_nt_fpga(lp, nt, csa->p, csa->q);
skip: /* change current basis header to adjacent one */
      spx_change_basis_fpga(lp, csa->p, csa->p_flag, csa->q);
      /* and update factorization of the basis matrix */
      if (csa->p > 0)
         spx_update_invb_fpga(lp, csa->p, head[csa->p]);
#if 1
      if (perturb <= 0)
      {  if (csa->phase == 1)
         {  int cnt;
            /* adjust penalty function coefficients */
            cnt = adjust_penalty_fpga(csa, csa->tcol.nnz, csa->tcol.ind,
               0.99 * tol_bnd, 0.99 * tol_bnd1);
            if (cnt)
            {  /* some coefficients were changed, so invalidate reduced
                * costs of non-basic variables */
               /*xprintf("... cnt = %d\n", cnt);*/
               csa->d_st = 0;
            }
         }
      }
      else
      {  /* FIXME */
         play_bounds_fpga(csa, 0);
      }
#endif
      /* simplex iteration complete */
      csa->it_cnt++;

      goto loop;


fini: /* restore original objective function */
      memcpy(c, csa->orig_c, (1+n) * sizeof(double));
      /* compute reduced costs of non-basic variables and determine
       * solution dual status, if necessary */
      if (csa->p_stat != GLP_UNDEF && csa->d_stat == GLP_UNDEF)
      {  xassert(ret != GLP_EFAIL);
         spx_eval_pi_fpga(lp, pi);
         for (j = 1; j <= n-m; j++)
            d[j] = spx_eval_dj_fpga(lp, pi, j);
         csa->num = spx_chuzc_sel_fpga(lp, d, tol_dj, tol_dj1, NULL);
         csa->d_stat = (csa->num == 0 ? GLP_FEAS : GLP_INFEAS);
      }
      return ret;
}


int primal_simplex_fpga_original(struct csa *csa)
{     /* primal simplex method main logic routine */
      SPXLP *lp = csa->lp;
      int m = lp->m;
      int n = lp->n;
      double *c = lp->c;
      int *head = lp->head;
      SPXAT *at = csa->at;
      SPXNT *nt = csa->nt;
      double *beta = csa->beta;
      double *d = csa->d;
      SPXSE *se = csa->se;
      int *list = csa->list;
#if 0 /* 11/VI-2017 */
      double *tcol = csa->tcol;
      double *trow = csa->trow;
#endif
#if 0 /* 09/VII-2017 */
      double *pi = csa->work;
      double *rho = csa->work;
#else
      double *pi = csa->work.vec;
      double *rho = csa->work.vec;
#endif
      int msg_lev = csa->msg_lev;
      double tol_bnd = csa->tol_bnd;
      double tol_bnd1 = csa->tol_bnd1;
      double tol_dj = csa->tol_dj;
      double tol_dj1 = csa->tol_dj1;
      int perturb = -1;
      /* -1 = perturbation is not used, but enabled
       *  0 = perturbation is not used and disabled
       * +1 = perturbation is being used */
      int j, refct, ret;
loop: /* main loop starts here */
      /* compute factorization of the basis matrix */
      if (!lp->valid)
      {  double cond;
         ret = spx_factorize_fpga(lp);
         csa->inv_cnt++;
         if (ret != 0)
         {  if (msg_lev >= GLP_MSG_ERR)
               xprintf("Error: unable to factorize the basis matrix (%d"
                  ")\n", ret);
            csa->p_stat = csa->d_stat = GLP_UNDEF;
            ret = GLP_EFAIL;
            goto fini;
         }
         /* check condition of the basis matrix */
         cond = bfd_condest_fpga(lp->bfd);
         if (cond > 1.0 / DBL_EPSILON)
         {  if (msg_lev >= GLP_MSG_ERR)
               xprintf("Error: basis matrix is singular to working prec"
                  "ision (cond = %.3g)\n", cond);
            csa->p_stat = csa->d_stat = GLP_UNDEF;
            ret = GLP_EFAIL;
            goto fini;
         }
         if (cond > 0.001 / DBL_EPSILON)
         {  if (msg_lev >= GLP_MSG_ERR)
               xprintf("Warning: basis matrix is ill-conditioned (cond "
                  "= %.3g)\n", cond);
         }
         /* invalidate basic solution components */
         csa->beta_st = csa->d_st = 0;
      }
      /* compute values of basic variables beta = (beta[i]) */
      if (!csa->beta_st)
      {  spx_eval_beta_fpga(lp, beta);
         csa->beta_st = 1; /* just computed */
         /* determine the search phase, if not determined yet */
         if (!csa->phase)
         {  if (set_penalty_fpga(csa, 0.97 * tol_bnd, 0.97 * tol_bnd1))
            {  /* current basic solution is primal infeasible */
               /* start to minimize the sum of infeasibilities */
               csa->phase = 1;
            }
            else
            {  /* current basic solution is primal feasible */
               /* start to minimize the original objective function */
               csa->phase = 2;
               memcpy(c, csa->orig_c, (1+n) * sizeof(double));
            }
            /* working objective coefficients have been changed, so
             * invalidate reduced costs */
            csa->d_st = 0;
         }
         /* make sure that the current basic solution remains primal
          * feasible (or pseudo-feasible on phase I) */
         if (perturb <= 0)
         {  if (check_feas_fpga(csa, csa->phase, tol_bnd, tol_bnd1))
            {  /* excessive bound violations due to round-off errors */
#if 1 /* 01/VII-2017 */
               if (perturb < 0)
               {  if (msg_lev >= GLP_MSG_ALL)
                     xprintf("Perturbing LP to avoid instability [%d].."
                        ".\n", csa->it_cnt);
                  perturb = 1;
                  goto loop;
               }
#endif
               if (msg_lev >= GLP_MSG_ERR)
                  xprintf("Warning: numerical instability (primal simpl"
                     "ex, phase %s)\n", csa->phase == 1 ? "I" : "II");
               /* restart the search */
               lp->valid = 0;
               csa->phase = 0;
               goto loop;
            }
            if (csa->phase == 1)
            {  int i, cnt;
               for (i = 1; i <= m; i++)
                  csa->tcol.ind[i] = i;
               cnt = adjust_penalty_fpga(csa, m, csa->tcol.ind,
                  0.99 * tol_bnd, 0.99 * tol_bnd1);
               if (cnt)
               {  /*xprintf("*** cnt = %d\n", cnt);*/
                  csa->d_st = 0;
               }
            }
         }
         else
         {  /* FIXME */
            play_bounds_fpga(csa, 1);
         }
      }
      /* at this point the search phase is determined */
      xassert(csa->phase == 1 || csa->phase == 2);
      /* compute reduced costs of non-basic variables d = (d[j]) */
      if (!csa->d_st)
      {  spx_eval_pi_fpga(lp, pi);
         for (j = 1; j <= n-m; j++)
            d[j] = spx_eval_dj_fpga(lp, pi, j);
         csa->d_st = 1; /* just computed */
      }
      /* reset the reference space, if necessary */
      if (se != NULL && !se->valid)
         spx_reset_refsp_fpga(lp, se), refct = 1000;
      /* at this point the basis factorization and all basic solution
       * components are valid */
      xassert(lp->valid && csa->beta_st && csa->d_st);
#if CHECK_ACCURACY
      /* check accuracy of current basic solution components (only for
       * debugging) */
      check_accuracy(csa);
#endif
      /* check if the iteration limit has been exhausted */
      if (csa->it_cnt - csa->it_beg >= csa->it_lim)
      {  if (perturb > 0)
         {  /* remove perturbation */
            remove_perturb_fpga(csa);
            perturb = 0;
         }
         if (csa->beta_st != 1)
            csa->beta_st = 0;
         if (csa->d_st != 1)
            csa->d_st = 0;
         if (!(csa->beta_st && csa->d_st))
            goto loop;
         display_fpga(csa, 1);
         if (msg_lev >= GLP_MSG_ALL)
            xprintf("ITERATION LIMIT EXCEEDED; SEARCH TERMINATED\n");
         csa->p_stat = (csa->phase == 2 ? GLP_FEAS : GLP_INFEAS);
         csa->d_stat = GLP_UNDEF; /* will be set below */
         ret = GLP_EITLIM;
         goto fini;
      }
      /* check if the time limit has been exhausted */
      if (1000.0 * xdifftime(xtime(), csa->tm_beg) >= csa->tm_lim)
      {  if (perturb > 0)
         {  /* remove perturbation */
            remove_perturb_fpga(csa);
            perturb = 0;
         }
         if (csa->beta_st != 1)
            csa->beta_st = 0;
         if (csa->d_st != 1)
            csa->d_st = 0;
         if (!(csa->beta_st && csa->d_st))
            goto loop;
         display_fpga(csa, 1);
         if (msg_lev >= GLP_MSG_ALL)
            xprintf("TIME LIMIT EXCEEDED; SEARCH TERMINATED\n");
         csa->p_stat = (csa->phase == 2 ? GLP_FEAS : GLP_INFEAS);
         csa->d_stat = GLP_UNDEF; /* will be set below */
         ret = GLP_ETMLIM;
         goto fini;
      }
      /* display the search progress */
      display_fpga(csa, 0);
      /* select eligible non-basic variables */
      switch (csa->phase)
      {  case 1:
            csa->num = spx_chuzc_sel_fpga(lp, d, 1e-8, 0.0, list);
            break;
         case 2:
            csa->num = spx_chuzc_sel_fpga(lp, d, tol_dj, tol_dj1, list);
            break;
         default:
            xassert(csa != csa);
      }
      /* check for optimality */
      if (csa->num == 0)
      {  if (perturb > 0 && csa->phase == 2)
         {  /* remove perturbation */
            remove_perturb_fpga(csa);
            perturb = 0;
         }
         if (csa->beta_st != 1)
            csa->beta_st = 0;
         if (csa->d_st != 1)
            csa->d_st = 0;
         if (!(csa->beta_st && csa->d_st))
            goto loop;
         /* current basis is optimal */
         display_fpga(csa, 1);
         switch (csa->phase)
         {  case 1:
               /* check for primal feasibility */
               if (!check_feas_fpga(csa, 2, tol_bnd, tol_bnd1))
               {  /* feasible solution found; switch to phase II */
                  memcpy(c, csa->orig_c, (1+n) * sizeof(double));
                  csa->phase = 2;
                  csa->d_st = 0;
                  goto loop;
               }
               /* no feasible solution exists */
#if 1 /* 09/VII-2017 */
               /* FIXME: remove perturbation */
#endif
               if (msg_lev >= GLP_MSG_ALL)
                  xprintf("LP HAS NO PRIMAL FEASIBLE SOLUTION\n");
               csa->p_stat = GLP_NOFEAS;
               csa->d_stat = GLP_UNDEF; /* will be set below */
               ret = 0;
               goto fini;
            case 2:
               /* optimal solution found */
               if (msg_lev >= GLP_MSG_ALL)
                  xprintf("OPTIMAL LP SOLUTION FOUND\n");
               csa->p_stat = csa->d_stat = GLP_FEAS;
               ret = 0;
               goto fini;
            default:
               xassert(csa != csa);
         }
      }
      /* choose xN[q] and xB[p] */
#if 0 /* 23/VI-2017 */
#if 0 /* 17/III-2016 */
      choose_pivot_fpga(csa);
#else
      if (choose_pivot_fpga(csa) < 0)
      {  lp->valid = 0;
         goto loop;
      }
#endif
#else
      ret = choose_pivot_fpga(csa);
      if (ret < 0)
      {  lp->valid = 0;
         goto loop;
      }
      if (ret == 0)
         csa->ns_cnt++;
      else
         csa->ls_cnt++;
#endif
      /* check for unboundedness */
      if (csa->p == 0)
      {  if (perturb > 0)
         {  /* remove perturbation */
            remove_perturb_fpga(csa);
            perturb = 0;
         }
         if (csa->beta_st != 1)
            csa->beta_st = 0;
         if (csa->d_st != 1)
            csa->d_st = 0;
         if (!(csa->beta_st && csa->d_st))
            goto loop;
         display_fpga(csa, 1);
         switch (csa->phase)
         {  case 1:
               /* this should never happen */
               if (msg_lev >= GLP_MSG_ERR)
                  xprintf("Error: primal simplex failed\n");
               csa->p_stat = csa->d_stat = GLP_UNDEF;
               ret = GLP_EFAIL;
               goto fini;
            case 2:
               /* primal unboundedness detected */
               if (msg_lev >= GLP_MSG_ALL)
                  xprintf("LP HAS UNBOUNDED PRIMAL SOLUTION\n");
               csa->p_stat = GLP_FEAS;
               csa->d_stat = GLP_NOFEAS;
               ret = 0;
               goto fini;
            default:
               xassert(csa != csa);
         }
      }
#if 1 /* 01/VII-2017 */
      /* check for stalling */
      if (csa->p > 0)
      {  int k;
         xassert(1 <= csa->p && csa->p <= m);
         k = head[csa->p]; /* x[k] = xB[p] */
         if (lp->l[k] != lp->u[k])
         {  if (csa->p_flag)
            {  /* xB[p] goes to its upper bound */
               xassert(lp->u[k] != +DBL_MAX);
               if (fabs(beta[csa->p] - lp->u[k]) >= 1e-6)
               {  csa->degen = 0;
                  goto skip1;
               }
            }
            else if (lp->l[k] == -DBL_MAX)
            {  /* unusual case */
               goto skip1;
            }
            else
            {  /* xB[p] goes to its lower bound */
               xassert(lp->l[k] != -DBL_MAX);
               if (fabs(beta[csa->p] - lp->l[k]) >= 1e-6)
               {  csa->degen = 0;
                  goto skip1;
               }
            }
            /* degenerate iteration has been detected */
            csa->degen++;
            if (perturb < 0 && csa->degen >= 200)
            {  if (msg_lev >= GLP_MSG_ALL)
                  xprintf("Perturbing LP to avoid stalling [%d]...\n",
                     csa->it_cnt);
               perturb = 1;
            }
skip1:      ;
         }
      }
#endif
      /* update values of basic variables for adjacent basis */
#if 0 /* 11/VI-2017 */
      spx_update_beta(lp, beta, csa->p, csa->p_flag, csa->q, tcol);
#else
      spx_update_beta_s_fpga(lp, beta, csa->p, csa->p_flag, csa->q,
         &csa->tcol);
#endif
      csa->beta_st = 2;
      /* p < 0 means that xN[q] jumps to its opposite bound */
      if (csa->p < 0)
         goto skip;
      /* xN[q] enters and xB[p] leaves the basis */
      /* compute p-th row of inv(B) */
      spx_eval_rho(lp, csa->p, rho);
      /* compute p-th (pivot) row of the simplex table */
#if 0 /* 11/VI-2017 */
      if (at != NULL)
         spx_eval_trow1_fpga(lp, at, rho, trow);
      else
         spx_nt_prod_fpga(lp, nt, trow, 1, -1.0, rho);
#else
      if (at != NULL)
         spx_eval_trow1_fpga(lp, at, rho, csa->trow.vec);
      else
         spx_nt_prod_fpga(lp, nt, csa->trow.vec, 1, -1.0, rho);
      fvs_gather_vec_fpga(&csa->trow, DBL_EPSILON);
#endif
      /* FIXME: tcol[p] and trow[q] should be close to each other */
#if 0 /* 26/V-2017 by cmatraki */
      xassert(trow[csa->q] != 0.0);
#else
      if (csa->trow.vec[csa->q] == 0.0)
      {  if (msg_lev >= GLP_MSG_ERR)
            xprintf("Error: trow[q] = 0.0\n");
         csa->p_stat = csa->d_stat = GLP_UNDEF;
         ret = GLP_EFAIL;
         goto fini;
      }
#endif
      /* update reduced costs of non-basic variables for adjacent
       * basis */
#if 1 /* 23/VI-2017 */
      /* dual solution may be invalidated due to long step */
      if (csa->d_st)
#endif
#if 0 /* 11/VI-2017 */
      if (spx_update_d(lp, d, csa->p, csa->q, trow, tcol) <= 1e-9)
#else
      if (spx_update_d_s_fpga(lp, d, csa->p, csa->q, &csa->trow, &csa->tcol)
         <= 1e-9)
#endif
      {  /* successful updating */
         csa->d_st = 2;
         if (csa->phase == 1)
         {  /* adjust reduced cost of xN[q] in adjacent basis, since
             * its penalty coefficient changes (see below) */
            d[csa->q] -= c[head[csa->p]];
         }
      }
      else
      {  /* new reduced costs are inaccurate */
         csa->d_st = 0;
      }
      if (csa->phase == 1)
      {  /* xB[p] leaves the basis replacing xN[q], so set its penalty
          * coefficient to zero */
         c[head[csa->p]] = 0.0;
      }
      /* update steepest edge weights for adjacent basis, if used */
      if (se != NULL)
      {  if (refct > 0)
#if 0 /* 11/VI-2017 */
         {  if (spx_update_gamma_fpga(lp, se, csa->p, csa->q, trow, tcol)
               <= 1e-3)
#else /* FIXME: spx_update_gamma_s */
         {  if (spx_update_gamma_fpga(lp, se, csa->p, csa->q, csa->trow.vec,
               csa->tcol.vec) <= 1e-3)
#endif
            {  /* successful updating */
               refct--;
            }
            else
            {  /* new weights are inaccurate; reset reference space */
               se->valid = 0;
            }
         }
         else
         {  /* too many updates; reset reference space */
            se->valid = 0;
         }
      }
      /* update matrix N for adjacent basis, if used */
      if (nt != NULL)
         spx_update_nt_fpga(lp, nt, csa->p, csa->q);
skip: /* change current basis header to adjacent one */
      spx_change_basis_fpga(lp, csa->p, csa->p_flag, csa->q);
      /* and update factorization of the basis matrix */
      if (csa->p > 0)
         spx_update_invb_fpga(lp, csa->p, head[csa->p]);
#if 1
      if (perturb <= 0)
      {  if (csa->phase == 1)
         {  int cnt;
            /* adjust penalty function coefficients */
            cnt = adjust_penalty_fpga(csa, csa->tcol.nnz, csa->tcol.ind,
               0.99 * tol_bnd, 0.99 * tol_bnd1);
            if (cnt)
            {  /* some coefficients were changed, so invalidate reduced
                * costs of non-basic variables */
               /*xprintf("... cnt = %d\n", cnt);*/
               csa->d_st = 0;
            }
         }
      }
      else
      {  /* FIXME */
         play_bounds_fpga(csa, 0);
      }
#endif
      /* simplex iteration complete */
      csa->it_cnt++;
      goto loop;
fini: /* restore original objective function */
      memcpy(c, csa->orig_c, (1+n) * sizeof(double));
      /* compute reduced costs of non-basic variables and determine
       * solution dual status, if necessary */
      if (csa->p_stat != GLP_UNDEF && csa->d_stat == GLP_UNDEF)
      {  xassert(ret != GLP_EFAIL);
         spx_eval_pi_fpga(lp, pi);
         for (j = 1; j <= n-m; j++)
            d[j] = spx_eval_dj_fpga(lp, pi, j);
         csa->num = spx_chuzc_sel_fpga(lp, d, tol_dj, tol_dj1, NULL);
         csa->d_stat = (csa->num == 0 ? GLP_FEAS : GLP_INFEAS);
      }
      return ret;
}