4 files changed, 447 insertions, 0 deletions
diff --git a/simplex-dev/src/CMakeLists.txt b/simplex-dev/src/CMakeLists.txt
new file mode 100755
index 0000000..becd251
--- /dev/null
+++ b/simplex-dev/src/CMakeLists.txt
@@ -0,0 +1,95 @@
+set(DEVICE_SOURCE_FILE kernel.cpp)
+set(KERNEL_HEADER_FILE kernel.hpp)
+set(HOST_SOURCE_FILE host.cpp)
+set(TARGET_NAME simplexdev)
+set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(FPGA_TARGET ${TARGET_NAME}.fpga)
+
+# FPGA board selection
+if(NOT DEFINED FPGA_BOARD)
+    set(FPGA_BOARD "intel_a10gx_pac:pac_a10")
+    message(STATUS "FPGA_BOARD was not specified.\
+                    \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \
+                    \nPlease refer to the README for information on board selection.")
+else()
+    message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}")
+endif()
+
+# This is a Windows-specific flag that enables exception handling in host code
+if(WIN32)
+    set(WIN_FLAG "/EHsc")
+endif()
+
+# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages.
+# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
+# 2. The "link" stage invokes the compiler's FPGA backend before linking.
+#    For this reason, FPGA backend flags must be passed as link flags in CMake.
+set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_LINK_FLAGS "-fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga")
+set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}")
+# use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation
+
+###############################################################################
+### FPGA Emulator
+###############################################################################
+# To compile in a single command:
+#    dpcpp -fintelfpga -DFPGA_EMULATOR host.cpp kernel.cpp -o fast_recompile.fpga_emu
+# CMake executes:
+#    [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o host.cpp.o -c host.cpp
+#    [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o kernel.cpp.o -c kernel.cpp
+#    [link]    dpcpp -fintelfpga host.cpp.o kernel.cpp.o -o fast_recompile.fpga_emu
+add_executable(${EMULATOR_TARGET} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE})
+set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}")
+set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+
+###############################################################################
+### Generate Report
+###############################################################################
+# To compile manually:
+#   dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=early host.cpp kernel.cpp -o fast_compile_report.a
+set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a)
+# The compile output is not an executable, but an intermediate compilation result unique to DPC++.
+add_executable(${FPGA_EARLY_IMAGE} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE})
+add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE})
+set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS})
+set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early")
+# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus®
+
+
+###############################################################################
+### FPGA Hardware
+###############################################################################
+# To compile manually:
+#   dpcpp -fintelfpga -c host.cpp -o host.o
+#   dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=image kernel.cpp -o dev_image.a
+#   dpcpp -fintelfpga host.o dev_image.a -o fast_recompile.fpga
+
+if(WIN32)
+    set(FPGA_TARGET ${FPGA_TARGET}.exe)
+endif()
+add_custom_target(fpga DEPENDS ${FPGA_TARGET})
+set(HOST_OBJ "host.o")
+set(DEVICE_OBJ "dev.o")
+set(DEVICE_IMAGE_OBJ "dev_image.a")
+
+set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}")
+separate_arguments(CMAKE_CXX_FLAGS_LIST)
+set(HARDWARE_COMPILE_FLAGS_LIST "${HARDWARE_COMPILE_FLAGS}")
+separate_arguments(HARDWARE_COMPILE_FLAGS_LIST)
+set(HARDWARE_LINK_FLAGS_LIST "${HARDWARE_LINK_FLAGS}")
+separate_arguments(HARDWARE_LINK_FLAGS_LIST)
+ 
+
+add_custom_command(OUTPUT ${HOST_OBJ}
+                   COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_COMPILE_FLAGS_LIST} -c ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_OBJ}
+                   DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE})
+
+add_custom_command(OUTPUT ${DEVICE_IMAGE_OBJ}
+                   COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link=image ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_IMAGE_OBJ}
+                   DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE})
+
+add_custom_command(OUTPUT ${FPGA_TARGET}
+                   COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} -fintelfpga ${HOST_OBJ} ${DEVICE_IMAGE_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET}
+                   DEPENDS ${HOST_OBJ} ${DEVICE_IMAGE_OBJ})
diff --git a/simplex-dev/src/host.cpp b/simplex-dev/src/host.cpp
new file mode 100644
index 0000000..8d0507b
--- /dev/null
+++ b/simplex-dev/src/host.cpp
@@ -0,0 +1,123 @@
+//==============================================================
+// Copyright Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// =============================================================
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <type_traits>
+
+#include <CL/sycl.hpp>
+#include <sycl/ext/intel/fpga_extensions.hpp>
+
+// dpc_common.hpp can be found in the dev-utilities include folder.
+// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp
+#include "dpc_common.hpp"
+
+// This code sample demonstrates how to split the host and FPGA kernel code into
+// separate compilation units so that they can be separately recompiled.
+// Consult the README for a detailed discussion.
+//  - host.cpp (this file) contains exclusively code that executes on the host.
+//  - kernel.cpp contains almost exclusively code that executes on the device.
+//  - kernel.hpp contains only the forward declaration of a function containing
+//    the device code.
+#include "kernel.hpp"
+
+using namespace sycl;
+
+template <typename K>
+void printMatrix(std::vector<K> &vec, int col, std::string msg) {
+	std::cout << msg << ":" << std::endl << "[" << std::endl;
+	for (size_t i=0; i<vec.size(); ++i) {
+		std::cout << vec.at(i);
+		if (i<vec.size()-1 && vec.size() > 1) {
+			std::cout << ",\t";
+		}
+		if (i%col == col-1) {
+			std::cout << std::endl;
+		}
+	}
+	std::cout << "]" << std::endl;
+}
+
+template <typename K>
+void printVec(std::vector<K> &vec, std::string msg) {
+	std::cout << msg << ": ";
+	std::cout << "[";
+
+	for (size_t i=0; i<vec.size(); ++i) {
+		std::cout << vec.at(i);
+		if (i<vec.size()-1 && vec.size() > 1) {
+			std::cout << ", ";
+		}
+	}
+	std::cout << "]" << std::endl;
+}
+
+int main() {
+	std::vector<float> a = { 2,  1,  1, 1, 0, 0,
+	                         1,  3,  2, 0, 1, 0,
+	                         2,  1,  2, 0, 0, 1};
+
+	std::vector<float> c = {-6, -5, -4, 0, 0, 0};
+	std::vector<float> b = {180, 300, 240};
+
+	std::vector<int> resultFlags = {-1, -1, -1};
+
+  // Select either the FPGA emulator or FPGA device
+#if defined(FPGA_EMULATOR)
+  ext::intel::fpga_emulator_selector device_selector;
+#else
+  ext::intel::fpga_selector device_selector;
+#endif
+
+  try {
+    // Create a queue bound to the chosen device.
+    // If the device is unavailable, a SYCL runtime exception is thrown.
+    queue q(device_selector, dpc_common::exception_handler);
+
+
+    // make sure the device supports USM device allocations
+    device d = q.get_device();
+    if (!d.get_info<info::device::usm_device_allocations>()) {
+      std::cerr << "ERROR: The selected device does not support USM device"
+                << " allocations\n";
+      return 1;
+    }
+
+    printMatrix(a, 6, "a");
+    printVec(resultFlags, "result flags");
+    // The definition of this function is in a different compilation unit,
+    // so host and device code can be separately compiled.
+    double timePassed = RunKernel(q, a, b, c, resultFlags);
+
+    std::cout << "------------------------" << std::endl;
+    printMatrix(a, 6, "a");
+    printVec(resultFlags, "result flags");
+
+    std::cout << std::endl << std::endl;
+	std::cout << "timePassed: " << timePassed << std::endl;
+
+
+  } catch (exception const &e) {
+    // Catches exceptions in the host code
+    std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n";
+
+    // Most likely the runtime couldn't find FPGA hardware!
+    if (e.code().value() == CL_DEVICE_NOT_FOUND) {
+      std::cerr << "If you are targeting an FPGA, please ensure that your "
+                   "system has a correctly configured FPGA board.\n";
+      std::cerr << "Run sys_check in the oneAPI root directory to verify.\n";
+      std::cerr << "If you are targeting the FPGA emulator, compile with "
+                   "-DFPGA_EMULATOR.\n";
+    }
+    std::terminate();
+  }
+
+
+
+  std::cout << "done\n";
+  return 0;
+}
diff --git a/simplex-dev/src/kernel.cpp b/simplex-dev/src/kernel.cpp
new file mode 100644
index 0000000..86bd98e
--- /dev/null
+++ b/simplex-dev/src/kernel.cpp
@@ -0,0 +1,216 @@
+#include <sycl/ext/intel/fpga_extensions.hpp>
+#include <chrono>
+#include "kernel.hpp"
+
+
+template<typename T>
+SYCL_EXTERNAL bool checkOptimality(device_ptr<T> C, int size) {
+    bool isOptimal = false;
+	int positveValueCount = 0;
+    //check if the coefficients of the objective function are negative
+	for(int i=0; i<size;i++){
+	    float value = C[i];
+	    if(value >= 0){
+	        positveValueCount++;
+	    }
+	}
+	//if all the constraints are positive now,the table is optimal
+	if(positveValueCount == size){
+	    isOptimal = true;
+	}
+    return isOptimal;
+}
+
+
+template<typename T>
+SYCL_EXTERNAL int findPivotColumn(device_ptr<T> C, int size) {
+	int location = 0;
+	float minimum = C[0];
+	for(int i=1; i<size; ++i) {
+		if(C[i]<minimum) {
+			minimum = C[i];
+			location = i;
+		}
+	}
+	return location;
+}
+
+
+//find the row with the pivot value.The least value item's row in the B array
+template<typename T>
+SYCL_EXTERNAL int findPivotRow(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotColumn, int rows, int cols, bool *isUnbounded) {
+	int negativeValueCount = 0;
+	for (int i = 0; i < rows; i++) {
+		// 2d to 1d array index mapping
+		int pivotColumnIndex = (i*cols)+pivotColumn;
+		if (A[pivotColumnIndex] <= 0) {
+			negativeValueCount += 1;
+		}
+	}
+	int location = 0;
+	//checking the unbound condition if all the values are negative ones
+	if (negativeValueCount == rows) {
+		*isUnbounded = true;
+	} else {
+		float minimum = 99999999.0;
+
+		for (int i = 0; i < rows; ++i) {
+			// 2d to 1d array index mapping
+			int pivotColumnIndex = (i*cols)+pivotColumn;
+			float tmpACols = A[pivotColumnIndex];
+			if (tmpACols > 0) {
+				float result = B[i] / tmpACols;
+				if (result > 0 && result < minimum) {
+					minimum = result;
+					location = i;
+				}
+			}
+		}
+	}
+	return location;
+}
+
+template<typename T>
+SYCL_EXTERNAL void doPivotting(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotRow, int pivotColumn, int rows, int cols) {
+	int columnIndex = (pivotRow*cols)+pivotColumn;
+	float pivetValue = A[columnIndex];
+
+	float pivotRowVals[6]; //the column with the pivot
+	float pivotColVals[3]; //the row with the pivot
+	float rowNew[6]; //the row after processing the pivot value
+
+	float maximum = 0;
+	maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step
+
+
+	//get the row that has the pivot value
+	for (int i = 0; i < cols; ++i) {
+		int pivotRowIndex = (pivotRow*cols)+i;
+		pivotRowVals[i] = A[pivotRowIndex];
+	}
+	//get the column that has the pivot value
+	for (int j = 0; j < rows; ++j) {
+		int pivotColIndex = (j*cols)+pivotColumn;
+		pivotColVals[j] = A[pivotColIndex];
+	}
+
+	//set the row values that has the pivot value divided by the pivot value and put into new row
+	for (int k = 0; k < cols; ++k) {
+		rowNew[k] = pivotRowVals[k]/pivetValue;
+	}
+
+	B[pivotRow] = B[pivotRow]/pivetValue;
+
+	//process the other coefficients in the A array by subtracting
+	for (int m=0; m < rows; ++m) {
+		//ignore the pivot row as we already calculated that
+		if (m != pivotRow) {
+			for (int p = 0; p<cols; ++p) {
+				float multiplyValue = pivotColVals[m];
+				int indexA_M_P = (m*cols)+p;
+				A[indexA_M_P] = A[indexA_M_P] - (multiplyValue * rowNew[p]);
+				//C[p] = C[p] - (multiplyValue*C[pivotRow]);
+				//B[i] = B[i] - (multiplyValue*B[pivotRow]);
+			}
+
+		}
+	}
+
+	//process the values of the B array
+	for (int i = 0; i<rows; ++i) {  // rows = B.size()
+		if (i != pivotRow) {
+			float multiplyValue = pivotColVals[i];
+			B[i] = B[i]-(multiplyValue*B[pivotRow]);
+
+		}
+	}
+	//the least coefficient of the constraints of the objective function
+	float multiplyValue = C[pivotColumn];
+	//process the C array
+	for (int i = 0; i < C.size(); i++) {
+		C[i] = C[i]-(multiplyValue * rowNew[i]);
+
+	}
+
+	//replacing the pivot row in the new calculated A array
+	for (int i = 0; i<cols; ++i) {
+		int indexA_pivotRow_i = (pivotRow*cols)+i;
+		A[indexA_pivotRow_i] = rowNew[i];
+	}
+}
+
+// Forward declare the kernel names in the global scope. This FPGA best practice
+// reduces compiler name mangling in the optimization reports.
+class SimplexCalc;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+		std::vector<T> &inCHost, std::vector<int>& resultFlags) {
+
+	int rowSizeA = inBHost.size();
+	int colSizeA = inCHost.size();
+
+	T *inADevice = malloc_device<T> (inAHost.size(), q);
+	T *inBDevice = malloc_device<T> (inBHost.size(), q);
+	T *inCDevice = malloc_device<T> (inCHost.size(), q);
+	int *inResultFlagsDevice = malloc_device<int> (resultFlags.size(), q);
+
+	if (inADevice == nullptr) {
+		std::cerr << "ERROR: failed to allocate space for 'inADevice'\n";
+		std::terminate();
+	}
+	if (inBDevice == nullptr) {
+		std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n";
+		std::terminate();
+	}
+	if (inCDevice == nullptr) {
+		std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n";
+		std::terminate();
+	}
+
+	auto start = std::chrono::high_resolution_clock::now();
+
+	q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait();
+	q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait();
+	q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait();
+	q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait();
+
+	q.submit([&](handler &h) {
+		h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] {
+			device_ptr<T> inA(inADevice);
+			device_ptr<T> inB(inBDevice);
+			device_ptr<T> inC(inCDevice);
+			device_ptr<int> inResultFlags(inResultFlagsDevice);
+
+			bool tempIsOptimizal = checkOptimality(inC, colSizeA);
+			if (tempIsOptimizal) {
+				inResultFlags[0] = 1;
+				return;
+			} else {
+				inResultFlags[0] = 0;
+			}
+
+			int pivotColumn = findPivotColumn(inC, colSizeA);
+			inResultFlags[1] = pivotColumn;
+
+			inA[0] = 43.0; //test only
+			bool isUnbounded = true;
+			int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded);
+			inResultFlags[2] = pivotRow;
+		});
+	}).wait();
+
+	q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T));
+	q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T));
+	q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T));
+	q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int));
+
+	auto end = std::chrono::high_resolution_clock::now();
+	std::chrono::duration<double, std::milli> diff = end - start;
+
+	sycl::free(inADevice, q);
+	sycl::free(inBDevice, q);
+	sycl::free(inCDevice, q);
+	sycl::free(inResultFlagsDevice, q);
+
+	return diff.count();
+}
diff --git a/simplex-dev/src/kernel.hpp b/simplex-dev/src/kernel.hpp
new file mode 100755
index 0000000..77b8ca7
--- /dev/null
+++ b/simplex-dev/src/kernel.hpp
@@ -0,0 +1,13 @@
+//==============================================================
+// Copyright Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// =============================================================
+#include <CL/sycl.hpp>
+
+using namespace sycl;
+
+typedef float T;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+		std::vector<T> &inCHost, std::vector<int>& resultFlags);