summaryrefslogtreecommitdiff
path: root/simplex-dev/src
diff options
context:
space:
mode:
Diffstat (limited to 'simplex-dev/src')
-rwxr-xr-xsimplex-dev/src/CMakeLists.txt95
-rw-r--r--simplex-dev/src/host.cpp123
-rw-r--r--simplex-dev/src/kernel.cpp216
-rwxr-xr-xsimplex-dev/src/kernel.hpp13
4 files changed, 447 insertions, 0 deletions
diff --git a/simplex-dev/src/CMakeLists.txt b/simplex-dev/src/CMakeLists.txt
new file mode 100755
index 0000000..becd251
--- /dev/null
+++ b/simplex-dev/src/CMakeLists.txt
@@ -0,0 +1,95 @@
+set(DEVICE_SOURCE_FILE kernel.cpp)
+set(KERNEL_HEADER_FILE kernel.hpp)
+set(HOST_SOURCE_FILE host.cpp)
+set(TARGET_NAME simplexdev)
+set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(FPGA_TARGET ${TARGET_NAME}.fpga)
+
+# FPGA board selection
+if(NOT DEFINED FPGA_BOARD)
+ set(FPGA_BOARD "intel_a10gx_pac:pac_a10")
+ message(STATUS "FPGA_BOARD was not specified.\
+ \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \
+ \nPlease refer to the README for information on board selection.")
+else()
+ message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}")
+endif()
+
+# This is a Windows-specific flag that enables exception handling in host code
+if(WIN32)
+ set(WIN_FLAG "/EHsc")
+endif()
+
+# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages.
+# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
+# 2. The "link" stage invokes the compiler's FPGA backend before linking.
+# For this reason, FPGA backend flags must be passed as link flags in CMake.
+set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_LINK_FLAGS "-fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga")
+set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}")
+# use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation
+
+###############################################################################
+### FPGA Emulator
+###############################################################################
+# To compile in a single command:
+# dpcpp -fintelfpga -DFPGA_EMULATOR host.cpp kernel.cpp -o fast_recompile.fpga_emu
+# CMake executes:
+# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o host.cpp.o -c host.cpp
+# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o kernel.cpp.o -c kernel.cpp
+# [link] dpcpp -fintelfpga host.cpp.o kernel.cpp.o -o fast_recompile.fpga_emu
+add_executable(${EMULATOR_TARGET} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE})
+set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}")
+set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+
+###############################################################################
+### Generate Report
+###############################################################################
+# To compile manually:
+# dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=early host.cpp kernel.cpp -o fast_compile_report.a
+set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a)
+# The compile output is not an executable, but an intermediate compilation result unique to DPC++.
+add_executable(${FPGA_EARLY_IMAGE} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE})
+add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE})
+set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS})
+set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early")
+# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus®
+
+
+###############################################################################
+### FPGA Hardware
+###############################################################################
+# To compile manually:
+# dpcpp -fintelfpga -c host.cpp -o host.o
+# dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=image kernel.cpp -o dev_image.a
+# dpcpp -fintelfpga host.o dev_image.a -o fast_recompile.fpga
+
+if(WIN32)
+ set(FPGA_TARGET ${FPGA_TARGET}.exe)
+endif()
+add_custom_target(fpga DEPENDS ${FPGA_TARGET})
+set(HOST_OBJ "host.o")
+set(DEVICE_OBJ "dev.o")
+set(DEVICE_IMAGE_OBJ "dev_image.a")
+
+set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}")
+separate_arguments(CMAKE_CXX_FLAGS_LIST)
+set(HARDWARE_COMPILE_FLAGS_LIST "${HARDWARE_COMPILE_FLAGS}")
+separate_arguments(HARDWARE_COMPILE_FLAGS_LIST)
+set(HARDWARE_LINK_FLAGS_LIST "${HARDWARE_LINK_FLAGS}")
+separate_arguments(HARDWARE_LINK_FLAGS_LIST)
+
+
+add_custom_command(OUTPUT ${HOST_OBJ}
+ COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_COMPILE_FLAGS_LIST} -c ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_OBJ}
+ DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE})
+
+add_custom_command(OUTPUT ${DEVICE_IMAGE_OBJ}
+ COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link=image ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_IMAGE_OBJ}
+ DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE})
+
+add_custom_command(OUTPUT ${FPGA_TARGET}
+ COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} -fintelfpga ${HOST_OBJ} ${DEVICE_IMAGE_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET}
+ DEPENDS ${HOST_OBJ} ${DEVICE_IMAGE_OBJ})
diff --git a/simplex-dev/src/host.cpp b/simplex-dev/src/host.cpp
new file mode 100644
index 0000000..8d0507b
--- /dev/null
+++ b/simplex-dev/src/host.cpp
@@ -0,0 +1,123 @@
+//==============================================================
+// Copyright Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// =============================================================
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <type_traits>
+
+#include <CL/sycl.hpp>
+#include <sycl/ext/intel/fpga_extensions.hpp>
+
+// dpc_common.hpp can be found in the dev-utilities include folder.
+// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp
+#include "dpc_common.hpp"
+
+// This code sample demonstrates how to split the host and FPGA kernel code into
+// separate compilation units so that they can be separately recompiled.
+// Consult the README for a detailed discussion.
+// - host.cpp (this file) contains exclusively code that executes on the host.
+// - kernel.cpp contains almost exclusively code that executes on the device.
+// - kernel.hpp contains only the forward declaration of a function containing
+// the device code.
+#include "kernel.hpp"
+
+using namespace sycl;
+
+template <typename K>
+void printMatrix(std::vector<K> &vec, int col, std::string msg) {
+ std::cout << msg << ":" << std::endl << "[" << std::endl;
+ for (size_t i=0; i<vec.size(); ++i) {
+ std::cout << vec.at(i);
+ if (i<vec.size()-1 && vec.size() > 1) {
+ std::cout << ",\t";
+ }
+ if (i%col == col-1) {
+ std::cout << std::endl;
+ }
+ }
+ std::cout << "]" << std::endl;
+}
+
+template <typename K>
+void printVec(std::vector<K> &vec, std::string msg) {
+ std::cout << msg << ": ";
+ std::cout << "[";
+
+ for (size_t i=0; i<vec.size(); ++i) {
+ std::cout << vec.at(i);
+ if (i<vec.size()-1 && vec.size() > 1) {
+ std::cout << ", ";
+ }
+ }
+ std::cout << "]" << std::endl;
+}
+
+int main() {
+ std::vector<float> a = { 2, 1, 1, 1, 0, 0,
+ 1, 3, 2, 0, 1, 0,
+ 2, 1, 2, 0, 0, 1};
+
+ std::vector<float> c = {-6, -5, -4, 0, 0, 0};
+ std::vector<float> b = {180, 300, 240};
+
+ std::vector<int> resultFlags = {-1, -1, -1};
+
+ // Select either the FPGA emulator or FPGA device
+#if defined(FPGA_EMULATOR)
+ ext::intel::fpga_emulator_selector device_selector;
+#else
+ ext::intel::fpga_selector device_selector;
+#endif
+
+ try {
+ // Create a queue bound to the chosen device.
+ // If the device is unavailable, a SYCL runtime exception is thrown.
+ queue q(device_selector, dpc_common::exception_handler);
+
+
+ // make sure the device supports USM device allocations
+ device d = q.get_device();
+ if (!d.get_info<info::device::usm_device_allocations>()) {
+ std::cerr << "ERROR: The selected device does not support USM device"
+ << " allocations\n";
+ return 1;
+ }
+
+ printMatrix(a, 6, "a");
+ printVec(resultFlags, "result flags");
+ // The definition of this function is in a different compilation unit,
+ // so host and device code can be separately compiled.
+ double timePassed = RunKernel(q, a, b, c, resultFlags);
+
+ std::cout << "------------------------" << std::endl;
+ printMatrix(a, 6, "a");
+ printVec(resultFlags, "result flags");
+
+ std::cout << std::endl << std::endl;
+ std::cout << "timePassed: " << timePassed << std::endl;
+
+
+ } catch (exception const &e) {
+ // Catches exceptions in the host code
+ std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n";
+
+ // Most likely the runtime couldn't find FPGA hardware!
+ if (e.code().value() == CL_DEVICE_NOT_FOUND) {
+ std::cerr << "If you are targeting an FPGA, please ensure that your "
+ "system has a correctly configured FPGA board.\n";
+ std::cerr << "Run sys_check in the oneAPI root directory to verify.\n";
+ std::cerr << "If you are targeting the FPGA emulator, compile with "
+ "-DFPGA_EMULATOR.\n";
+ }
+ std::terminate();
+ }
+
+
+
+ std::cout << "done\n";
+ return 0;
+}
diff --git a/simplex-dev/src/kernel.cpp b/simplex-dev/src/kernel.cpp
new file mode 100644
index 0000000..86bd98e
--- /dev/null
+++ b/simplex-dev/src/kernel.cpp
@@ -0,0 +1,216 @@
+#include <sycl/ext/intel/fpga_extensions.hpp>
+#include <chrono>
+#include "kernel.hpp"
+
+
+template<typename T>
+SYCL_EXTERNAL bool checkOptimality(device_ptr<T> C, int size) {
+ bool isOptimal = false;
+ int positveValueCount = 0;
+ //check if the coefficients of the objective function are negative
+ for(int i=0; i<size;i++){
+ float value = C[i];
+ if(value >= 0){
+ positveValueCount++;
+ }
+ }
+ //if all the constraints are positive now,the table is optimal
+ if(positveValueCount == size){
+ isOptimal = true;
+ }
+ return isOptimal;
+}
+
+
+template<typename T>
+SYCL_EXTERNAL int findPivotColumn(device_ptr<T> C, int size) {
+ int location = 0;
+ float minimum = C[0];
+ for(int i=1; i<size; ++i) {
+ if(C[i]<minimum) {
+ minimum = C[i];
+ location = i;
+ }
+ }
+ return location;
+}
+
+
+//find the row with the pivot value.The least value item's row in the B array
+template<typename T>
+SYCL_EXTERNAL int findPivotRow(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotColumn, int rows, int cols, bool *isUnbounded) {
+ int negativeValueCount = 0;
+ for (int i = 0; i < rows; i++) {
+ // 2d to 1d array index mapping
+ int pivotColumnIndex = (i*cols)+pivotColumn;
+ if (A[pivotColumnIndex] <= 0) {
+ negativeValueCount += 1;
+ }
+ }
+ int location = 0;
+ //checking the unbound condition if all the values are negative ones
+ if (negativeValueCount == rows) {
+ *isUnbounded = true;
+ } else {
+ float minimum = 99999999.0;
+
+ for (int i = 0; i < rows; ++i) {
+ // 2d to 1d array index mapping
+ int pivotColumnIndex = (i*cols)+pivotColumn;
+ float tmpACols = A[pivotColumnIndex];
+ if (tmpACols > 0) {
+ float result = B[i] / tmpACols;
+ if (result > 0 && result < minimum) {
+ minimum = result;
+ location = i;
+ }
+ }
+ }
+ }
+ return location;
+}
+
+template<typename T>
+SYCL_EXTERNAL void doPivotting(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotRow, int pivotColumn, int rows, int cols) {
+ int columnIndex = (pivotRow*cols)+pivotColumn;
+ float pivetValue = A[columnIndex];
+
+ float pivotRowVals[6]; //the column with the pivot
+ float pivotColVals[3]; //the row with the pivot
+ float rowNew[6]; //the row after processing the pivot value
+
+ float maximum = 0;
+ maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step
+
+
+ //get the row that has the pivot value
+ for (int i = 0; i < cols; ++i) {
+ int pivotRowIndex = (pivotRow*cols)+i;
+ pivotRowVals[i] = A[pivotRowIndex];
+ }
+ //get the column that has the pivot value
+ for (int j = 0; j < rows; ++j) {
+ int pivotColIndex = (j*cols)+pivotColumn;
+ pivotColVals[j] = A[pivotColIndex];
+ }
+
+ //set the row values that has the pivot value divided by the pivot value and put into new row
+ for (int k = 0; k < cols; ++k) {
+ rowNew[k] = pivotRowVals[k]/pivetValue;
+ }
+
+ B[pivotRow] = B[pivotRow]/pivetValue;
+
+ //process the other coefficients in the A array by subtracting
+ for (int m=0; m < rows; ++m) {
+ //ignore the pivot row as we already calculated that
+ if (m != pivotRow) {
+ for (int p = 0; p<cols; ++p) {
+ float multiplyValue = pivotColVals[m];
+ int indexA_M_P = (m*cols)+p;
+ A[indexA_M_P] = A[indexA_M_P] - (multiplyValue * rowNew[p]);
+ //C[p] = C[p] - (multiplyValue*C[pivotRow]);
+ //B[i] = B[i] - (multiplyValue*B[pivotRow]);
+ }
+
+ }
+ }
+
+ //process the values of the B array
+ for (int i = 0; i<rows; ++i) { // rows = B.size()
+ if (i != pivotRow) {
+ float multiplyValue = pivotColVals[i];
+ B[i] = B[i]-(multiplyValue*B[pivotRow]);
+
+ }
+ }
+ //the least coefficient of the constraints of the objective function
+ float multiplyValue = C[pivotColumn];
+ //process the C array
+ for (int i = 0; i < C.size(); i++) {
+ C[i] = C[i]-(multiplyValue * rowNew[i]);
+
+ }
+
+ //replacing the pivot row in the new calculated A array
+ for (int i = 0; i<cols; ++i) {
+ int indexA_pivotRow_i = (pivotRow*cols)+i;
+ A[indexA_pivotRow_i] = rowNew[i];
+ }
+}
+
+// Forward declare the kernel names in the global scope. This FPGA best practice
+// reduces compiler name mangling in the optimization reports.
+class SimplexCalc;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+ std::vector<T> &inCHost, std::vector<int>& resultFlags) {
+
+ int rowSizeA = inBHost.size();
+ int colSizeA = inCHost.size();
+
+ T *inADevice = malloc_device<T> (inAHost.size(), q);
+ T *inBDevice = malloc_device<T> (inBHost.size(), q);
+ T *inCDevice = malloc_device<T> (inCHost.size(), q);
+ int *inResultFlagsDevice = malloc_device<int> (resultFlags.size(), q);
+
+ if (inADevice == nullptr) {
+ std::cerr << "ERROR: failed to allocate space for 'inADevice'\n";
+ std::terminate();
+ }
+ if (inBDevice == nullptr) {
+ std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n";
+ std::terminate();
+ }
+ if (inCDevice == nullptr) {
+ std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n";
+ std::terminate();
+ }
+
+ auto start = std::chrono::high_resolution_clock::now();
+
+ q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait();
+ q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait();
+ q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait();
+ q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait();
+
+ q.submit([&](handler &h) {
+ h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] {
+ device_ptr<T> inA(inADevice);
+ device_ptr<T> inB(inBDevice);
+ device_ptr<T> inC(inCDevice);
+ device_ptr<int> inResultFlags(inResultFlagsDevice);
+
+ bool tempIsOptimizal = checkOptimality(inC, colSizeA);
+ if (tempIsOptimizal) {
+ inResultFlags[0] = 1;
+ return;
+ } else {
+ inResultFlags[0] = 0;
+ }
+
+ int pivotColumn = findPivotColumn(inC, colSizeA);
+ inResultFlags[1] = pivotColumn;
+
+ inA[0] = 43.0; //test only
+ bool isUnbounded = true;
+ int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded);
+ inResultFlags[2] = pivotRow;
+ });
+ }).wait();
+
+ q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T));
+ q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T));
+ q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T));
+ q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int));
+
+ auto end = std::chrono::high_resolution_clock::now();
+ std::chrono::duration<double, std::milli> diff = end - start;
+
+ sycl::free(inADevice, q);
+ sycl::free(inBDevice, q);
+ sycl::free(inCDevice, q);
+ sycl::free(inResultFlagsDevice, q);
+
+ return diff.count();
+}
diff --git a/simplex-dev/src/kernel.hpp b/simplex-dev/src/kernel.hpp
new file mode 100755
index 0000000..77b8ca7
--- /dev/null
+++ b/simplex-dev/src/kernel.hpp
@@ -0,0 +1,13 @@
+//==============================================================
+// Copyright Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// =============================================================
+#include <CL/sycl.hpp>
+
+using namespace sycl;
+
+typedef float T;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+ std::vector<T> &inCHost, std::vector<int>& resultFlags);