diff options
Diffstat (limited to 'simplex-dev/src')
| -rwxr-xr-x | simplex-dev/src/CMakeLists.txt | 95 | ||||
| -rw-r--r-- | simplex-dev/src/host.cpp | 123 | ||||
| -rw-r--r-- | simplex-dev/src/kernel.cpp | 216 | ||||
| -rwxr-xr-x | simplex-dev/src/kernel.hpp | 13 | 
4 files changed, 447 insertions, 0 deletions
| diff --git a/simplex-dev/src/CMakeLists.txt b/simplex-dev/src/CMakeLists.txt new file mode 100755 index 0000000..becd251 --- /dev/null +++ b/simplex-dev/src/CMakeLists.txt @@ -0,0 +1,95 @@ +set(DEVICE_SOURCE_FILE kernel.cpp) +set(KERNEL_HEADER_FILE kernel.hpp) +set(HOST_SOURCE_FILE host.cpp) +set(TARGET_NAME simplexdev) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_BOARD) +    set(FPGA_BOARD "intel_a10gx_pac:pac_a10") +    message(STATUS "FPGA_BOARD was not specified.\ +                    \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \ +                    \nPlease refer to the README for information on board selection.") +else() +    message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}") +endif() + +# This is a Windows-specific flag that enables exception handling in host code +if(WIN32) +    set(WIN_FLAG "/EHsc") +endif() + +# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +#    For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") +set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga") +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +#    dpcpp -fintelfpga -DFPGA_EMULATOR host.cpp kernel.cpp -o fast_recompile.fpga_emu +# CMake executes: +#    [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o host.cpp.o -c host.cpp +#    [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o kernel.cpp.o -c kernel.cpp +#    [link]    dpcpp -fintelfpga host.cpp.o kernel.cpp.o -o fast_recompile.fpga_emu +add_executable(${EMULATOR_TARGET} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +#   dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=early host.cpp kernel.cpp -o fast_compile_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to DPC++. +add_executable(${FPGA_EARLY_IMAGE} ${HOST_SOURCE_FILE} ${DEVICE_SOURCE_FILE}) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile manually: +#   dpcpp -fintelfpga -c host.cpp -o host.o +#   dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=image kernel.cpp -o dev_image.a +#   dpcpp -fintelfpga host.o dev_image.a -o fast_recompile.fpga + +if(WIN32) +    set(FPGA_TARGET ${FPGA_TARGET}.exe) +endif() +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set(HOST_OBJ "host.o") +set(DEVICE_OBJ "dev.o") +set(DEVICE_IMAGE_OBJ "dev_image.a") + +set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}") +separate_arguments(CMAKE_CXX_FLAGS_LIST) +set(HARDWARE_COMPILE_FLAGS_LIST "${HARDWARE_COMPILE_FLAGS}") +separate_arguments(HARDWARE_COMPILE_FLAGS_LIST) +set(HARDWARE_LINK_FLAGS_LIST "${HARDWARE_LINK_FLAGS}") +separate_arguments(HARDWARE_LINK_FLAGS_LIST) +  + +add_custom_command(OUTPUT ${HOST_OBJ} +                   COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_COMPILE_FLAGS_LIST} -c ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_OBJ} +                   DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + +add_custom_command(OUTPUT ${DEVICE_IMAGE_OBJ} +                   COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link=image ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_IMAGE_OBJ} +                   DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + +add_custom_command(OUTPUT ${FPGA_TARGET} +                   COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS_LIST} -fintelfpga ${HOST_OBJ} ${DEVICE_IMAGE_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} +                   DEPENDS ${HOST_OBJ} ${DEVICE_IMAGE_OBJ}) diff --git a/simplex-dev/src/host.cpp b/simplex-dev/src/host.cpp new file mode 100644 index 0000000..8d0507b --- /dev/null +++ b/simplex-dev/src/host.cpp @@ -0,0 +1,123 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include <iostream> +#include <vector> +#include <string> +#include <type_traits> + +#include <CL/sycl.hpp> +#include <sycl/ext/intel/fpga_extensions.hpp> + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + +// This code sample demonstrates how to split the host and FPGA kernel code into +// separate compilation units so that they can be separately recompiled. +// Consult the README for a detailed discussion. +//  - host.cpp (this file) contains exclusively code that executes on the host. +//  - kernel.cpp contains almost exclusively code that executes on the device. +//  - kernel.hpp contains only the forward declaration of a function containing +//    the device code. +#include "kernel.hpp" + +using namespace sycl; + +template <typename K> +void printMatrix(std::vector<K> &vec, int col, std::string msg) { +	std::cout << msg << ":" << std::endl << "[" << std::endl; +	for (size_t i=0; i<vec.size(); ++i) { +		std::cout << vec.at(i); +		if (i<vec.size()-1 && vec.size() > 1) { +			std::cout << ",\t"; +		} +		if (i%col == col-1) { +			std::cout << std::endl; +		} +	} +	std::cout << "]" << std::endl; +} + +template <typename K> +void printVec(std::vector<K> &vec, std::string msg) { +	std::cout << msg << ": "; +	std::cout << "["; + +	for (size_t i=0; i<vec.size(); ++i) { +		std::cout << vec.at(i); +		if (i<vec.size()-1 && vec.size() > 1) { +			std::cout << ", "; +		} +	} +	std::cout << "]" << std::endl; +} + +int main() { +	std::vector<float> a = { 2,  1,  1, 1, 0, 0, +	                         1,  3,  2, 0, 1, 0, +	                         2,  1,  2, 0, 0, 1}; + +	std::vector<float> c = {-6, -5, -4, 0, 0, 0}; +	std::vector<float> b = {180, 300, 240}; + +	std::vector<int> resultFlags = {-1, -1, -1}; + +  // Select either the FPGA emulator or FPGA device +#if defined(FPGA_EMULATOR) +  ext::intel::fpga_emulator_selector device_selector; +#else +  ext::intel::fpga_selector device_selector; +#endif + +  try { +    // Create a queue bound to the chosen device. +    // If the device is unavailable, a SYCL runtime exception is thrown. +    queue q(device_selector, dpc_common::exception_handler); + + +    // make sure the device supports USM device allocations +    device d = q.get_device(); +    if (!d.get_info<info::device::usm_device_allocations>()) { +      std::cerr << "ERROR: The selected device does not support USM device" +                << " allocations\n"; +      return 1; +    } + +    printMatrix(a, 6, "a"); +    printVec(resultFlags, "result flags"); +    // The definition of this function is in a different compilation unit, +    // so host and device code can be separately compiled. +    double timePassed = RunKernel(q, a, b, c, resultFlags); + +    std::cout << "------------------------" << std::endl; +    printMatrix(a, 6, "a"); +    printVec(resultFlags, "result flags"); + +    std::cout << std::endl << std::endl; +	std::cout << "timePassed: " << timePassed << std::endl; + + +  } catch (exception const &e) { +    // Catches exceptions in the host code +    std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + +    // Most likely the runtime couldn't find FPGA hardware! +    if (e.code().value() == CL_DEVICE_NOT_FOUND) { +      std::cerr << "If you are targeting an FPGA, please ensure that your " +                   "system has a correctly configured FPGA board.\n"; +      std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; +      std::cerr << "If you are targeting the FPGA emulator, compile with " +                   "-DFPGA_EMULATOR.\n"; +    } +    std::terminate(); +  } + + + +  std::cout << "done\n"; +  return 0; +} diff --git a/simplex-dev/src/kernel.cpp b/simplex-dev/src/kernel.cpp new file mode 100644 index 0000000..86bd98e --- /dev/null +++ b/simplex-dev/src/kernel.cpp @@ -0,0 +1,216 @@ +#include <sycl/ext/intel/fpga_extensions.hpp> +#include <chrono> +#include "kernel.hpp" + + +template<typename T> +SYCL_EXTERNAL bool checkOptimality(device_ptr<T> C, int size) { +    bool isOptimal = false; +	int positveValueCount = 0; +    //check if the coefficients of the objective function are negative +	for(int i=0; i<size;i++){ +	    float value = C[i]; +	    if(value >= 0){ +	        positveValueCount++; +	    } +	} +	//if all the constraints are positive now,the table is optimal +	if(positveValueCount == size){ +	    isOptimal = true; +	} +    return isOptimal; +} + + +template<typename T> +SYCL_EXTERNAL int findPivotColumn(device_ptr<T> C, int size) { +	int location = 0; +	float minimum = C[0]; +	for(int i=1; i<size; ++i) { +		if(C[i]<minimum) { +			minimum = C[i]; +			location = i; +		} +	} +	return location; +} + + +//find the row with the pivot value.The least value item's row in the B array +template<typename T> +SYCL_EXTERNAL int findPivotRow(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotColumn, int rows, int cols, bool *isUnbounded) { +	int negativeValueCount = 0; +	for (int i = 0; i < rows; i++) { +		// 2d to 1d array index mapping +		int pivotColumnIndex = (i*cols)+pivotColumn; +		if (A[pivotColumnIndex] <= 0) { +			negativeValueCount += 1; +		} +	} +	int location = 0; +	//checking the unbound condition if all the values are negative ones +	if (negativeValueCount == rows) { +		*isUnbounded = true; +	} else { +		float minimum = 99999999.0; + +		for (int i = 0; i < rows; ++i) { +			// 2d to 1d array index mapping +			int pivotColumnIndex = (i*cols)+pivotColumn; +			float tmpACols = A[pivotColumnIndex]; +			if (tmpACols > 0) { +				float result = B[i] / tmpACols; +				if (result > 0 && result < minimum) { +					minimum = result; +					location = i; +				} +			} +		} +	} +	return location; +} + +template<typename T> +SYCL_EXTERNAL void doPivotting(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotRow, int pivotColumn, int rows, int cols) { +	int columnIndex = (pivotRow*cols)+pivotColumn; +	float pivetValue = A[columnIndex]; + +	float pivotRowVals[6]; //the column with the pivot +	float pivotColVals[3]; //the row with the pivot +	float rowNew[6]; //the row after processing the pivot value + +	float maximum = 0; +	maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step + + +	//get the row that has the pivot value +	for (int i = 0; i < cols; ++i) { +		int pivotRowIndex = (pivotRow*cols)+i; +		pivotRowVals[i] = A[pivotRowIndex]; +	} +	//get the column that has the pivot value +	for (int j = 0; j < rows; ++j) { +		int pivotColIndex = (j*cols)+pivotColumn; +		pivotColVals[j] = A[pivotColIndex]; +	} + +	//set the row values that has the pivot value divided by the pivot value and put into new row +	for (int k = 0; k < cols; ++k) { +		rowNew[k] = pivotRowVals[k]/pivetValue; +	} + +	B[pivotRow] = B[pivotRow]/pivetValue; + +	//process the other coefficients in the A array by subtracting +	for (int m=0; m < rows; ++m) { +		//ignore the pivot row as we already calculated that +		if (m != pivotRow) { +			for (int p = 0; p<cols; ++p) { +				float multiplyValue = pivotColVals[m]; +				int indexA_M_P = (m*cols)+p; +				A[indexA_M_P] = A[indexA_M_P] - (multiplyValue * rowNew[p]); +				//C[p] = C[p] - (multiplyValue*C[pivotRow]); +				//B[i] = B[i] - (multiplyValue*B[pivotRow]); +			} + +		} +	} + +	//process the values of the B array +	for (int i = 0; i<rows; ++i) {  // rows = B.size() +		if (i != pivotRow) { +			float multiplyValue = pivotColVals[i]; +			B[i] = B[i]-(multiplyValue*B[pivotRow]); + +		} +	} +	//the least coefficient of the constraints of the objective function +	float multiplyValue = C[pivotColumn]; +	//process the C array +	for (int i = 0; i < C.size(); i++) { +		C[i] = C[i]-(multiplyValue * rowNew[i]); + +	} + +	//replacing the pivot row in the new calculated A array +	for (int i = 0; i<cols; ++i) { +		int indexA_pivotRow_i = (pivotRow*cols)+i; +		A[indexA_pivotRow_i] = rowNew[i]; +	} +} + +// Forward declare the kernel names in the global scope. This FPGA best practice +// reduces compiler name mangling in the optimization reports. +class SimplexCalc; + +double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost, +		std::vector<T> &inCHost, std::vector<int>& resultFlags) { + +	int rowSizeA = inBHost.size(); +	int colSizeA = inCHost.size(); + +	T *inADevice = malloc_device<T> (inAHost.size(), q); +	T *inBDevice = malloc_device<T> (inBHost.size(), q); +	T *inCDevice = malloc_device<T> (inCHost.size(), q); +	int *inResultFlagsDevice = malloc_device<int> (resultFlags.size(), q); + +	if (inADevice == nullptr) { +		std::cerr << "ERROR: failed to allocate space for 'inADevice'\n"; +		std::terminate(); +	} +	if (inBDevice == nullptr) { +		std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n"; +		std::terminate(); +	} +	if (inCDevice == nullptr) { +		std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n"; +		std::terminate(); +	} + +	auto start = std::chrono::high_resolution_clock::now(); + +	q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait(); +	q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait(); +	q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait(); +	q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait(); + +	q.submit([&](handler &h) { +		h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] { +			device_ptr<T> inA(inADevice); +			device_ptr<T> inB(inBDevice); +			device_ptr<T> inC(inCDevice); +			device_ptr<int> inResultFlags(inResultFlagsDevice); + +			bool tempIsOptimizal = checkOptimality(inC, colSizeA); +			if (tempIsOptimizal) { +				inResultFlags[0] = 1; +				return; +			} else { +				inResultFlags[0] = 0; +			} + +			int pivotColumn = findPivotColumn(inC, colSizeA); +			inResultFlags[1] = pivotColumn; + +			inA[0] = 43.0; //test only +			bool isUnbounded = true; +			int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded); +			inResultFlags[2] = pivotRow; +		}); +	}).wait(); + +	q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T)); +	q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T)); +	q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T)); +	q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int)); + +	auto end = std::chrono::high_resolution_clock::now(); +	std::chrono::duration<double, std::milli> diff = end - start; + +	sycl::free(inADevice, q); +	sycl::free(inBDevice, q); +	sycl::free(inCDevice, q); +	sycl::free(inResultFlagsDevice, q); + +	return diff.count(); +} diff --git a/simplex-dev/src/kernel.hpp b/simplex-dev/src/kernel.hpp new file mode 100755 index 0000000..77b8ca7 --- /dev/null +++ b/simplex-dev/src/kernel.hpp @@ -0,0 +1,13 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include <CL/sycl.hpp> + +using namespace sycl; + +typedef float T; + +double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost, +		std::vector<T> &inCHost, std::vector<int>& resultFlags); | 
