From ef800d4ffafdbde7d7a172ad73bd984b1695c138 Mon Sep 17 00:00:00 2001
From: Pasha <pasha@member.fsf.org>
Date: Fri, 27 Jan 2023 00:54:07 +0000
Subject: simplex-glpk with modified glpk for fpga

---
 simplex-glpk/src/kernel.cpp | 263 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 263 insertions(+)
 create mode 100644 simplex-glpk/src/kernel.cpp

(limited to 'simplex-glpk/src/kernel.cpp')
diff --git a/simplex-glpk/src/kernel.cpp b/simplex-glpk/src/kernel.cpp
new file mode 100644
index 0000000..27e743a
--- /dev/null
+++ b/simplex-glpk/src/kernel.cpp
@@ -0,0 +1,263 @@
+#include <sycl/ext/intel/fpga_extensions.hpp>
+#include <chrono>
+#include "kernel.hpp"
+
+/*
+template<typename T>
+SYCL_EXTERNAL bool checkOptimality(device_ptr<T> C, int size) {
+    bool isOptimal = false;
+	int positveValueCount = 0;
+    //check if the coefficients of the objective function are negative
+	for(int i=0; i<size;i++){
+	    float value = C[i];
+	    if(value >= 0){
+	        positveValueCount++;
+	    }
+	}
+	//if all the constraints are positive now,the table is optimal
+	if(positveValueCount == size){
+	    isOptimal = true;
+	}
+    return isOptimal;
+}
+
+
+template<typename T>
+SYCL_EXTERNAL int findPivotColumn(device_ptr<T> C, int size) {
+	int location = 0;
+	float minimum = C[0];
+	for(int i=1; i<size; ++i) {
+		if(C[i]<minimum) {
+			minimum = C[i];
+			location = i;
+		}
+	}
+	return location;
+}
+
+
+//find the row with the pivot value.The least value item's row in the B array
+template<typename T>
+SYCL_EXTERNAL int findPivotRow(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotColumn, int rows, int cols, bool *isUnbounded) {
+	int negativeValueCount = 0;
+	for (int i = 0; i < rows; i++) {
+		// 2d to 1d array index mapping
+		int pivotColumnIndex = (i*cols)+pivotColumn;
+		if (A[pivotColumnIndex] <= 0) {
+			negativeValueCount += 1;
+		}
+	}
+	int location = 0;
+	//checking the unbound condition if all the values are negative ones
+	if (negativeValueCount == rows) {
+		*isUnbounded = true;
+	} else {
+		float minimum = 99999999.0;
+
+		for (int i = 0; i < rows; ++i) {
+			// 2d to 1d array index mapping
+			int pivotColumnIndex = (i*cols)+pivotColumn;
+			float tmpACols = A[pivotColumnIndex];
+			if (tmpACols > 0) {
+				float result = B[i] / tmpACols;
+				if (result > 0 && result < minimum) {
+					minimum = result;
+					location = i;
+				}
+			}
+		}
+	}
+	return location;
+}
+
+template<typename T>
+SYCL_EXTERNAL void doPivotting(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotRow, int pivotColumn, int rows, int cols) {
+	int columnIndex = (pivotRow*cols)+pivotColumn;
+	float pivetValue = A[columnIndex];
+
+	float pivotRowVals[6]; //the column with the pivot
+	float pivotColVals[3]; //the row with the pivot
+	float rowNew[6]; //the row after processing the pivot value
+
+	float maximum = 0;
+	maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step
+
+
+	//get the row that has the pivot value
+	for (int i = 0; i < cols; ++i) {
+		int pivotRowIndex = (pivotRow*cols)+i;
+		pivotRowVals[i] = A[pivotRowIndex];
+	}
+	//get the column that has the pivot value
+	for (int j = 0; j < rows; ++j) {
+		int pivotColIndex = (j*cols)+pivotColumn;
+		pivotColVals[j] = A[pivotColIndex];
+	}
+
+	//set the row values that has the pivot value divided by the pivot value and put into new row
+	for (int k = 0; k < cols; ++k) {
+		rowNew[k] = pivotRowVals[k]/pivetValue;
+	}
+
+	B[pivotRow] = B[pivotRow]/pivetValue;
+
+	//process the other coefficients in the A array by subtracting
+	for (int m=0; m < rows; ++m) {
+		//ignore the pivot row as we already calculated that
+		if (m != pivotRow) {
+			for (int p = 0; p<cols; ++p) {
+				float multiplyValue = pivotColVals[m];
+				int indexA_M_P = (m*cols)+p;
+				A[indexA_M_P] = A[indexA_M_P] - (multiplyValue * rowNew[p]);
+				//C[p] = C[p] - (multiplyValue*C[pivotRow]);
+				//B[i] = B[i] - (multiplyValue*B[pivotRow]);
+			}
+
+		}
+	}
+
+	//process the values of the B array
+	for (int i = 0; i<rows; ++i) {  // rows = B.size()
+		if (i != pivotRow) {
+			float multiplyValue = pivotColVals[i];
+			B[i] = B[i]-(multiplyValue*B[pivotRow]);
+
+		}
+	}
+	//the least coefficient of the constraints of the objective function
+	float multiplyValue = C[pivotColumn];
+	//process the C array
+	for (int i = 0; i < C.size(); i++) {
+		C[i] = C[i]-(multiplyValue * rowNew[i]);
+
+	}
+
+	//replacing the pivot row in the new calculated A array
+	for (int i = 0; i<cols; ++i) {
+		int indexA_pivotRow_i = (pivotRow*cols)+i;
+		A[indexA_pivotRow_i] = rowNew[i];
+	}
+}
+
+// Forward declare the kernel names in the global scope. This FPGA best practice
+// reduces compiler name mangling in the optimization reports.
+class SimplexCalc;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+		std::vector<T> &inCHost, std::vector<int>& resultFlags) {
+
+	int rowSizeA = inBHost.size();
+	int colSizeA = inCHost.size();
+
+	T *inADevice = malloc_device<T> (inAHost.size(), q);
+	T *inBDevice = malloc_device<T> (inBHost.size(), q);
+	T *inCDevice = malloc_device<T> (inCHost.size(), q);
+	int *inResultFlagsDevice = malloc_device<int> (resultFlags.size(), q);
+
+	if (inADevice == nullptr) {
+		std::cerr << "ERROR: failed to allocate space for 'inADevice'\n";
+		std::terminate();
+	}
+	if (inBDevice == nullptr) {
+		std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n";
+		std::terminate();
+	}
+	if (inCDevice == nullptr) {
+		std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n";
+		std::terminate();
+	}
+
+	auto start = std::chrono::high_resolution_clock::now();
+
+	q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait();
+	q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait();
+	q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait();
+	q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait();
+
+	q.submit([&](handler &h) {
+		h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] {
+			device_ptr<T> inA(inADevice);
+			device_ptr<T> inB(inBDevice);
+			device_ptr<T> inC(inCDevice);
+			device_ptr<int> inResultFlags(inResultFlagsDevice);
+
+			bool tempIsOptimizal = checkOptimality(inC, colSizeA);
+			if (tempIsOptimizal) {
+				inResultFlags[0] = 1;
+				return;
+			} else {
+				inResultFlags[0] = 0;
+			}
+
+			int pivotColumn = findPivotColumn(inC, colSizeA);
+			inResultFlags[1] = pivotColumn;
+
+			inA[0] = 43.0; //test only
+			bool isUnbounded = true;
+			int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded);
+			inResultFlags[2] = pivotRow;
+		});
+	}).wait();
+
+	q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T)).wait();
+	q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T)).wait();
+	q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T)).wait();
+	q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int)).wait();
+
+	auto end = std::chrono::high_resolution_clock::now();
+	std::chrono::duration<double, std::milli> diff = end - start;
+
+	sycl::free(inADevice, q);
+	sycl::free(inBDevice, q);
+	sycl::free(inCDevice, q);
+	sycl::free(inResultFlagsDevice, q);
+
+	return diff.count();
+}
+*/
+
+
+// Forward declare the kernel names in the global scope. This FPGA best practice
+// reduces compiler name mangling in the optimization reports.
+class SimplexCalc;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+		std::vector<T> &inCHost, std::vector<int>& resultFlags) {
+
+    constexpr int value = 100000;
+	size_t itemSize = 1000;
+	range numItems{itemSize};
+
+
+    int *parallel = malloc_shared<int>(itemSize, q);
+    if (parallel == nullptr) {
+    	 sycl::free(parallel, q);
+         std::cout << "Shared memory allocation failure.\n";
+         return -1;
+    }
+
+	auto start = std::chrono::high_resolution_clock::now();
+
+	  // Use parallel_for to populate consecutive numbers starting with a specified
+	  // value in parallel on device. This executes the kernel.
+	  //    1st parameter is the number of work items to use.
+	  //    2nd parameter is the kernel, a lambda that specifies what to do per
+	  //    work item. The parameter of the lambda is the work item id.
+	  // SYCL supports unnamed lambda kernel by default.
+	  auto e = q.parallel_for(numItems, [=](auto i) {
+		  parallel[i] = value + i;
+	  });
+
+	  // q.parallel_for() is an asynchronous call. SYCL runtime enqueues and runs
+	  // the kernel asynchronously. Wait for the asynchronous call to complete.
+	  e.wait();
+
+
+	auto end = std::chrono::high_resolution_clock::now();
+	std::chrono::duration<double, std::milli> diff = end - start;
+
+	sycl::free(parallel, q);
+
+
+	return diff.count();
+}
-- 
cgit v1.2.1