diff options
author | Pasha <pasha@member.fsf.org> | 2022-10-25 18:48:39 +0000 |
---|---|---|
committer | Pasha <pasha@member.fsf.org> | 2022-10-25 18:48:39 +0000 |
commit | 4550eacc18535ea8480c8303c62adbd046fa031d (patch) | |
tree | 39c850531e37a9ae6d390ac35cf21a6029d5b5e7 /simplex-dev/src | |
download | oneapi-4550eacc18535ea8480c8303c62adbd046fa031d.tar.gz oneapi-4550eacc18535ea8480c8303c62adbd046fa031d.tar.bz2 |
initial test
Diffstat (limited to 'simplex-dev/src')
-rwxr-xr-x | simplex-dev/src/CMakeLists.txt | 95 | ||||
-rw-r--r-- | simplex-dev/src/host.cpp | 123 | ||||
-rw-r--r-- | simplex-dev/src/kernel.cpp | 216 | ||||
-rwxr-xr-x | simplex-dev/src/kernel.hpp | 13 |
4 files changed, 447 insertions, 0 deletions
diff --git a/simplex-dev/src/CMakeLists.txt b/simplex-dev/src/CMakeLists.txt new file mode 100755 index 0000000..becd251 --- /dev/null +++ b/simplex-dev/src/CMakeLists.txt @@ -0,0 +1,95 @@ +set(DEVICE_SOURCE_FILE kernel.cpp) +set(KERNEL_HEADER_FILE kernel.hpp) +set(HOST_SOURCE_FILE host.cpp) +set(TARGET_NAME simplexdev) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_BOARD) + set(FPGA_BOARD "intel_a10gx_pac:pac_a10") + message(STATUS "FPGA_BOARD was not specified.\ + \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \ + \nPlease refer to the README for information on board selection.") +else() + message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}") +endif() + +# This is a Windows-specific flag that enables exception handling in host code +if(WIN32) + set(WIN_FLAG "/EHsc") +endif() + +# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") +set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga") +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# dpcpp -fintelfpga -DFPGA_EMULATOR host.cpp kernel.cpp -o fast_recompile.fpga_emu +# CMake executes: +# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o host.cpp.o -c host.cpp +# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o kernel.cpp.o -c kernel.cpp +# [link] dpcpp -fintelfpga host.cpp.o kernel.cpp.o -o fast_recompile.fpga_emu +add_executable(${EMULATOR_TARGET} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=early host.cpp kernel.cpp -o fast_compile_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to DPC++. +add_executable(${FPGA_EARLY_IMAGE} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE}) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile manually: +# dpcpp -fintelfpga -c host.cpp -o host.o +# dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=image kernel.cpp -o dev_image.a +# dpcpp -fintelfpga host.o dev_image.a -o fast_recompile.fpga + +if(WIN32) + set(FPGA_TARGET ${FPGA_TARGET}.exe) +endif() +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set(HOST_OBJ "host.o") +set(DEVICE_OBJ "dev.o") +set(DEVICE_IMAGE_OBJ "dev_image.a") + +set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}") +separate_arguments(CMAKE_CXX_FLAGS_LIST) +set(HARDWARE_COMPILE_FLAGS_LIST "${HARDWARE_COMPILE_FLAGS}") +separate_arguments(HARDWARE_COMPILE_FLAGS_LIST) +set(HARDWARE_LINK_FLAGS_LIST "${HARDWARE_LINK_FLAGS}") +separate_arguments(HARDWARE_LINK_FLAGS_LIST) + + +add_custom_command(OUTPUT ${HOST_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_COMPILE_FLAGS_LIST} -c ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_OBJ} + DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + +add_custom_command(OUTPUT ${DEVICE_IMAGE_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link=image ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_IMAGE_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + +add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} -fintelfpga ${HOST_OBJ} ${DEVICE_IMAGE_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${HOST_OBJ} ${DEVICE_IMAGE_OBJ}) diff --git a/simplex-dev/src/host.cpp b/simplex-dev/src/host.cpp new file mode 100644 index 0000000..8d0507b --- /dev/null +++ b/simplex-dev/src/host.cpp @@ -0,0 +1,123 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include <iostream> +#include <vector> +#include <string> +#include <type_traits> + +#include <CL/sycl.hpp> +#include <sycl/ext/intel/fpga_extensions.hpp> + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + +// This code sample demonstrates how to split the host and FPGA kernel code into +// separate compilation units so that they can be separately recompiled. +// Consult the README for a detailed discussion. +// - host.cpp (this file) contains exclusively code that executes on the host. +// - kernel.cpp contains almost exclusively code that executes on the device. +// - kernel.hpp contains only the forward declaration of a function containing +// the device code. +#include "kernel.hpp" + +using namespace sycl; + +template <typename K> +void printMatrix(std::vector<K> &vec, int col, std::string msg) { + std::cout << msg << ":" << std::endl << "[" << std::endl; + for (size_t i=0; i<vec.size(); ++i) { + std::cout << vec.at(i); + if (i<vec.size()-1 && vec.size() > 1) { + std::cout << ",\t"; + } + if (i%col == col-1) { + std::cout << std::endl; + } + } + std::cout << "]" << std::endl; +} + +template <typename K> +void printVec(std::vector<K> &vec, std::string msg) { + std::cout << msg << ": "; + std::cout << "["; + + for (size_t i=0; i<vec.size(); ++i) { + std::cout << vec.at(i); + if (i<vec.size()-1 && vec.size() > 1) { + std::cout << ", "; + } + } + std::cout << "]" << std::endl; +} + +int main() { + std::vector<float> a = { 2, 1, 1, 1, 0, 0, + 1, 3, 2, 0, 1, 0, + 2, 1, 2, 0, 0, 1}; + + std::vector<float> c = {-6, -5, -4, 0, 0, 0}; + std::vector<float> b = {180, 300, 240}; + + std::vector<int> resultFlags = {-1, -1, -1}; + + // Select either the FPGA emulator or FPGA device +#if defined(FPGA_EMULATOR) + ext::intel::fpga_emulator_selector device_selector; +#else + ext::intel::fpga_selector device_selector; +#endif + + try { + // Create a queue bound to the chosen device. + // If the device is unavailable, a SYCL runtime exception is thrown. + queue q(device_selector, dpc_common::exception_handler); + + + // make sure the device supports USM device allocations + device d = q.get_device(); + if (!d.get_info<info::device::usm_device_allocations>()) { + std::cerr << "ERROR: The selected device does not support USM device" + << " allocations\n"; + return 1; + } + + printMatrix(a, 6, "a"); + printVec(resultFlags, "result flags"); + // The definition of this function is in a different compilation unit, + // so host and device code can be separately compiled. + double timePassed = RunKernel(q, a, b, c, resultFlags); + + std::cout << "------------------------" << std::endl; + printMatrix(a, 6, "a"); + printVec(resultFlags, "result flags"); + + std::cout << std::endl << std::endl; + std::cout << "timePassed: " << timePassed << std::endl; + + + } catch (exception const &e) { + // Catches exceptions in the host code + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + + + std::cout << "done\n"; + return 0; +} diff --git a/simplex-dev/src/kernel.cpp b/simplex-dev/src/kernel.cpp new file mode 100644 index 0000000..86bd98e --- /dev/null +++ b/simplex-dev/src/kernel.cpp @@ -0,0 +1,216 @@ +#include <sycl/ext/intel/fpga_extensions.hpp> +#include <chrono> +#include "kernel.hpp" + + +template<typename T> +SYCL_EXTERNAL bool checkOptimality(device_ptr<T> C, int size) { + bool isOptimal = false; + int positveValueCount = 0; + //check if the coefficients of the objective function are negative + for(int i=0; i<size;i++){ + float value = C[i]; + if(value >= 0){ + positveValueCount++; + } + } + //if all the constraints are positive now,the table is optimal + if(positveValueCount == size){ + isOptimal = true; + } + return isOptimal; +} + + +template<typename T> +SYCL_EXTERNAL int findPivotColumn(device_ptr<T> C, int size) { + int location = 0; + float minimum = C[0]; + for(int i=1; i<size; ++i) { + if(C[i]<minimum) { + minimum = C[i]; + location = i; + } + } + return location; +} + + +//find the row with the pivot value.The least value item's row in the B array +template<typename T> +SYCL_EXTERNAL int findPivotRow(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotColumn, int rows, int cols, bool *isUnbounded) { + int negativeValueCount = 0; + for (int i = 0; i < rows; i++) { + // 2d to 1d array index mapping + int pivotColumnIndex = (i*cols)+pivotColumn; + if (A[pivotColumnIndex] <= 0) { + negativeValueCount += 1; + } + } + int location = 0; + //checking the unbound condition if all the values are negative ones + if (negativeValueCount == rows) { + *isUnbounded = true; + } else { + float minimum = 99999999.0; + + for (int i = 0; i < rows; ++i) { + // 2d to 1d array index mapping + int pivotColumnIndex = (i*cols)+pivotColumn; + float tmpACols = A[pivotColumnIndex]; + if (tmpACols > 0) { + float result = B[i] / tmpACols; + if (result > 0 && result < minimum) { + minimum = result; + location = i; + } + } + } + } + return location; +} + +template<typename T> +SYCL_EXTERNAL void doPivotting(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotRow, int pivotColumn, int rows, int cols) { + int columnIndex = (pivotRow*cols)+pivotColumn; + float pivetValue = A[columnIndex]; + + float pivotRowVals[6]; //the column with the pivot + float pivotColVals[3]; //the row with the pivot + float rowNew[6]; //the row after processing the pivot value + + float maximum = 0; + maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step + + + //get the row that has the pivot value + for (int i = 0; i < cols; ++i) { + int pivotRowIndex = (pivotRow*cols)+i; + pivotRowVals[i] = A[pivotRowIndex]; + } + //get the column that has the pivot value + for (int j = 0; j < rows; ++j) { + int pivotColIndex = (j*cols)+pivotColumn; + pivotColVals[j] = A[pivotColIndex]; + } + + //set the row values that has the pivot value divided by the pivot value and put into new row + for (int k = 0; k < cols; ++k) { + rowNew[k] = pivotRowVals[k]/pivetValue; + } + + B[pivotRow] = B[pivotRow]/pivetValue; + + //process the other coefficients in the A array by subtracting + for (int m=0; m < rows; ++m) { + //ignore the pivot row as we already calculated that + if (m != pivotRow) { + for (int p = 0; p<cols; ++p) { + float multiplyValue = pivotColVals[m]; + int indexA_M_P = (m*cols)+p; + A[indexA_M_P] = A[indexA_M_P] - (multiplyValue * rowNew[p]); + //C[p] = C[p] - (multiplyValue*C[pivotRow]); + //B[i] = B[i] - (multiplyValue*B[pivotRow]); + } + + } + } + + //process the values of the B array + for (int i = 0; i<rows; ++i) { // rows = B.size() + if (i != pivotRow) { + float multiplyValue = pivotColVals[i]; + B[i] = B[i]-(multiplyValue*B[pivotRow]); + + } + } + //the least coefficient of the constraints of the objective function + float multiplyValue = C[pivotColumn]; + //process the C array + for (int i = 0; i < C.size(); i++) { + C[i] = C[i]-(multiplyValue * rowNew[i]); + + } + + //replacing the pivot row in the new calculated A array + for (int i = 0; i<cols; ++i) { + int indexA_pivotRow_i = (pivotRow*cols)+i; + A[indexA_pivotRow_i] = rowNew[i]; + } +} + +// Forward declare the kernel names in the global scope. This FPGA best practice +// reduces compiler name mangling in the optimization reports. +class SimplexCalc; + +double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost, + std::vector<T> &inCHost, std::vector<int>& resultFlags) { + + int rowSizeA = inBHost.size(); + int colSizeA = inCHost.size(); + + T *inADevice = malloc_device<T> (inAHost.size(), q); + T *inBDevice = malloc_device<T> (inBHost.size(), q); + T *inCDevice = malloc_device<T> (inCHost.size(), q); + int *inResultFlagsDevice = malloc_device<int> (resultFlags.size(), q); + + if (inADevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inADevice'\n"; + std::terminate(); + } + if (inBDevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n"; + std::terminate(); + } + if (inCDevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n"; + std::terminate(); + } + + auto start = std::chrono::high_resolution_clock::now(); + + q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait(); + q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait(); + q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait(); + q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait(); + + q.submit([&](handler &h) { + h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] { + device_ptr<T> inA(inADevice); + device_ptr<T> inB(inBDevice); + device_ptr<T> inC(inCDevice); + device_ptr<int> inResultFlags(inResultFlagsDevice); + + bool tempIsOptimizal = checkOptimality(inC, colSizeA); + if (tempIsOptimizal) { + inResultFlags[0] = 1; + return; + } else { + inResultFlags[0] = 0; + } + + int pivotColumn = findPivotColumn(inC, colSizeA); + inResultFlags[1] = pivotColumn; + + inA[0] = 43.0; //test only + bool isUnbounded = true; + int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded); + inResultFlags[2] = pivotRow; + }); + }).wait(); + + q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T)); + q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T)); + q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T)); + q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int)); + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double, std::milli> diff = end - start; + + sycl::free(inADevice, q); + sycl::free(inBDevice, q); + sycl::free(inCDevice, q); + sycl::free(inResultFlagsDevice, q); + + return diff.count(); +} diff --git a/simplex-dev/src/kernel.hpp b/simplex-dev/src/kernel.hpp new file mode 100755 index 0000000..77b8ca7 --- /dev/null +++ b/simplex-dev/src/kernel.hpp @@ -0,0 +1,13 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include <CL/sycl.hpp> + +using namespace sycl; + +typedef float T; + +double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost, + std::vector<T> &inCHost, std::vector<int>& resultFlags); |