From 4550eacc18535ea8480c8303c62adbd046fa031d Mon Sep 17 00:00:00 2001 From: Pasha Date: Tue, 25 Oct 2022 18:48:39 +0000 Subject: initial test --- simplex-dev/src/CMakeLists.txt | 95 ++++++++++++++++++ simplex-dev/src/host.cpp | 123 +++++++++++++++++++++++ simplex-dev/src/kernel.cpp | 216 +++++++++++++++++++++++++++++++++++++++++ simplex-dev/src/kernel.hpp | 13 +++ 4 files changed, 447 insertions(+) create mode 100755 simplex-dev/src/CMakeLists.txt create mode 100644 simplex-dev/src/host.cpp create mode 100644 simplex-dev/src/kernel.cpp create mode 100755 simplex-dev/src/kernel.hpp (limited to 'simplex-dev/src') diff --git a/simplex-dev/src/CMakeLists.txt b/simplex-dev/src/CMakeLists.txt new file mode 100755 index 0000000..becd251 --- /dev/null +++ b/simplex-dev/src/CMakeLists.txt @@ -0,0 +1,95 @@ +set(DEVICE_SOURCE_FILE kernel.cpp) +set(KERNEL_HEADER_FILE kernel.hpp) +set(HOST_SOURCE_FILE host.cpp) +set(TARGET_NAME simplexdev) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_BOARD) + set(FPGA_BOARD "intel_a10gx_pac:pac_a10") + message(STATUS "FPGA_BOARD was not specified.\ + \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \ + \nPlease refer to the README for information on board selection.") +else() + message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}") +endif() + +# This is a Windows-specific flag that enables exception handling in host code +if(WIN32) + set(WIN_FLAG "/EHsc") +endif() + +# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") +set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga") +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# dpcpp -fintelfpga -DFPGA_EMULATOR host.cpp kernel.cpp -o fast_recompile.fpga_emu +# CMake executes: +# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o host.cpp.o -c host.cpp +# [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o kernel.cpp.o -c kernel.cpp +# [link] dpcpp -fintelfpga host.cpp.o kernel.cpp.o -o fast_recompile.fpga_emu +add_executable(${EMULATOR_TARGET} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# dpcpp -fintelfpga -Xshardware -Xsboard= -fsycl-link=early host.cpp kernel.cpp -o fast_compile_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to DPC++. +add_executable(${FPGA_EARLY_IMAGE} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE}) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile manually: +# dpcpp -fintelfpga -c host.cpp -o host.o +# dpcpp -fintelfpga -Xshardware -Xsboard= -fsycl-link=image kernel.cpp -o dev_image.a +# dpcpp -fintelfpga host.o dev_image.a -o fast_recompile.fpga + +if(WIN32) + set(FPGA_TARGET ${FPGA_TARGET}.exe) +endif() +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set(HOST_OBJ "host.o") +set(DEVICE_OBJ "dev.o") +set(DEVICE_IMAGE_OBJ "dev_image.a") + +set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}") +separate_arguments(CMAKE_CXX_FLAGS_LIST) +set(HARDWARE_COMPILE_FLAGS_LIST "${HARDWARE_COMPILE_FLAGS}") +separate_arguments(HARDWARE_COMPILE_FLAGS_LIST) +set(HARDWARE_LINK_FLAGS_LIST "${HARDWARE_LINK_FLAGS}") +separate_arguments(HARDWARE_LINK_FLAGS_LIST) + + +add_custom_command(OUTPUT ${HOST_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_COMPILE_FLAGS_LIST} -c ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_OBJ} + DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + +add_custom_command(OUTPUT ${DEVICE_IMAGE_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link=image ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_IMAGE_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + +add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} -fintelfpga ${HOST_OBJ} ${DEVICE_IMAGE_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${HOST_OBJ} ${DEVICE_IMAGE_OBJ}) diff --git a/simplex-dev/src/host.cpp b/simplex-dev/src/host.cpp new file mode 100644 index 0000000..8d0507b --- /dev/null +++ b/simplex-dev/src/host.cpp @@ -0,0 +1,123 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include +#include +#include +#include + +#include +#include + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + +// This code sample demonstrates how to split the host and FPGA kernel code into +// separate compilation units so that they can be separately recompiled. +// Consult the README for a detailed discussion. +// - host.cpp (this file) contains exclusively code that executes on the host. +// - kernel.cpp contains almost exclusively code that executes on the device. +// - kernel.hpp contains only the forward declaration of a function containing +// the device code. +#include "kernel.hpp" + +using namespace sycl; + +template +void printMatrix(std::vector &vec, int col, std::string msg) { + std::cout << msg << ":" << std::endl << "[" << std::endl; + for (size_t i=0; i 1) { + std::cout << ",\t"; + } + if (i%col == col-1) { + std::cout << std::endl; + } + } + std::cout << "]" << std::endl; +} + +template +void printVec(std::vector &vec, std::string msg) { + std::cout << msg << ": "; + std::cout << "["; + + for (size_t i=0; i 1) { + std::cout << ", "; + } + } + std::cout << "]" << std::endl; +} + +int main() { + std::vector a = { 2, 1, 1, 1, 0, 0, + 1, 3, 2, 0, 1, 0, + 2, 1, 2, 0, 0, 1}; + + std::vector c = {-6, -5, -4, 0, 0, 0}; + std::vector b = {180, 300, 240}; + + std::vector resultFlags = {-1, -1, -1}; + + // Select either the FPGA emulator or FPGA device +#if defined(FPGA_EMULATOR) + ext::intel::fpga_emulator_selector device_selector; +#else + ext::intel::fpga_selector device_selector; +#endif + + try { + // Create a queue bound to the chosen device. + // If the device is unavailable, a SYCL runtime exception is thrown. + queue q(device_selector, dpc_common::exception_handler); + + + // make sure the device supports USM device allocations + device d = q.get_device(); + if (!d.get_info()) { + std::cerr << "ERROR: The selected device does not support USM device" + << " allocations\n"; + return 1; + } + + printMatrix(a, 6, "a"); + printVec(resultFlags, "result flags"); + // The definition of this function is in a different compilation unit, + // so host and device code can be separately compiled. + double timePassed = RunKernel(q, a, b, c, resultFlags); + + std::cout << "------------------------" << std::endl; + printMatrix(a, 6, "a"); + printVec(resultFlags, "result flags"); + + std::cout << std::endl << std::endl; + std::cout << "timePassed: " << timePassed << std::endl; + + + } catch (exception const &e) { + // Catches exceptions in the host code + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + + + std::cout << "done\n"; + return 0; +} diff --git a/simplex-dev/src/kernel.cpp b/simplex-dev/src/kernel.cpp new file mode 100644 index 0000000..86bd98e --- /dev/null +++ b/simplex-dev/src/kernel.cpp @@ -0,0 +1,216 @@ +#include +#include +#include "kernel.hpp" + + +template +SYCL_EXTERNAL bool checkOptimality(device_ptr C, int size) { + bool isOptimal = false; + int positveValueCount = 0; + //check if the coefficients of the objective function are negative + for(int i=0; i= 0){ + positveValueCount++; + } + } + //if all the constraints are positive now,the table is optimal + if(positveValueCount == size){ + isOptimal = true; + } + return isOptimal; +} + + +template +SYCL_EXTERNAL int findPivotColumn(device_ptr C, int size) { + int location = 0; + float minimum = C[0]; + for(int i=1; i +SYCL_EXTERNAL int findPivotRow(device_ptr A, device_ptr B, device_ptr C, int pivotColumn, int rows, int cols, bool *isUnbounded) { + int negativeValueCount = 0; + for (int i = 0; i < rows; i++) { + // 2d to 1d array index mapping + int pivotColumnIndex = (i*cols)+pivotColumn; + if (A[pivotColumnIndex] <= 0) { + negativeValueCount += 1; + } + } + int location = 0; + //checking the unbound condition if all the values are negative ones + if (negativeValueCount == rows) { + *isUnbounded = true; + } else { + float minimum = 99999999.0; + + for (int i = 0; i < rows; ++i) { + // 2d to 1d array index mapping + int pivotColumnIndex = (i*cols)+pivotColumn; + float tmpACols = A[pivotColumnIndex]; + if (tmpACols > 0) { + float result = B[i] / tmpACols; + if (result > 0 && result < minimum) { + minimum = result; + location = i; + } + } + } + } + return location; +} + +template +SYCL_EXTERNAL void doPivotting(device_ptr A, device_ptr B, device_ptr C, int pivotRow, int pivotColumn, int rows, int cols) { + int columnIndex = (pivotRow*cols)+pivotColumn; + float pivetValue = A[columnIndex]; + + float pivotRowVals[6]; //the column with the pivot + float pivotColVals[3]; //the row with the pivot + float rowNew[6]; //the row after processing the pivot value + + float maximum = 0; + maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step + + + //get the row that has the pivot value + for (int i = 0; i < cols; ++i) { + int pivotRowIndex = (pivotRow*cols)+i; + pivotRowVals[i] = A[pivotRowIndex]; + } + //get the column that has the pivot value + for (int j = 0; j < rows; ++j) { + int pivotColIndex = (j*cols)+pivotColumn; + pivotColVals[j] = A[pivotColIndex]; + } + + //set the row values that has the pivot value divided by the pivot value and put into new row + for (int k = 0; k < cols; ++k) { + rowNew[k] = pivotRowVals[k]/pivetValue; + } + + B[pivotRow] = B[pivotRow]/pivetValue; + + //process the other coefficients in the A array by subtracting + for (int m=0; m < rows; ++m) { + //ignore the pivot row as we already calculated that + if (m != pivotRow) { + for (int p = 0; p &inAHost, std::vector &inBHost, + std::vector &inCHost, std::vector& resultFlags) { + + int rowSizeA = inBHost.size(); + int colSizeA = inCHost.size(); + + T *inADevice = malloc_device (inAHost.size(), q); + T *inBDevice = malloc_device (inBHost.size(), q); + T *inCDevice = malloc_device (inCHost.size(), q); + int *inResultFlagsDevice = malloc_device (resultFlags.size(), q); + + if (inADevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inADevice'\n"; + std::terminate(); + } + if (inBDevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n"; + std::terminate(); + } + if (inCDevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n"; + std::terminate(); + } + + auto start = std::chrono::high_resolution_clock::now(); + + q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait(); + q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait(); + q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait(); + q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait(); + + q.submit([&](handler &h) { + h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] { + device_ptr inA(inADevice); + device_ptr inB(inBDevice); + device_ptr inC(inCDevice); + device_ptr inResultFlags(inResultFlagsDevice); + + bool tempIsOptimizal = checkOptimality(inC, colSizeA); + if (tempIsOptimizal) { + inResultFlags[0] = 1; + return; + } else { + inResultFlags[0] = 0; + } + + int pivotColumn = findPivotColumn(inC, colSizeA); + inResultFlags[1] = pivotColumn; + + inA[0] = 43.0; //test only + bool isUnbounded = true; + int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded); + inResultFlags[2] = pivotRow; + }); + }).wait(); + + q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T)); + q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T)); + q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T)); + q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int)); + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = end - start; + + sycl::free(inADevice, q); + sycl::free(inBDevice, q); + sycl::free(inCDevice, q); + sycl::free(inResultFlagsDevice, q); + + return diff.count(); +} diff --git a/simplex-dev/src/kernel.hpp b/simplex-dev/src/kernel.hpp new file mode 100755 index 0000000..77b8ca7 --- /dev/null +++ b/simplex-dev/src/kernel.hpp @@ -0,0 +1,13 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include + +using namespace sycl; + +typedef float T; + +double RunKernel(queue &q, std::vector &inAHost, std::vector &inBHost, + std::vector &inCHost, std::vector& resultFlags); -- cgit v1.2.1