From ef800d4ffafdbde7d7a172ad73bd984b1695c138 Mon Sep 17 00:00:00 2001 From: Pasha Date: Fri, 27 Jan 2023 00:54:07 +0000 Subject: simplex-glpk with modified glpk for fpga --- simplex-glpk/src/kernel.cpp | 263 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 simplex-glpk/src/kernel.cpp (limited to 'simplex-glpk/src/kernel.cpp') diff --git a/simplex-glpk/src/kernel.cpp b/simplex-glpk/src/kernel.cpp new file mode 100644 index 0000000..27e743a --- /dev/null +++ b/simplex-glpk/src/kernel.cpp @@ -0,0 +1,263 @@ +#include +#include +#include "kernel.hpp" + +/* +template +SYCL_EXTERNAL bool checkOptimality(device_ptr C, int size) { + bool isOptimal = false; + int positveValueCount = 0; + //check if the coefficients of the objective function are negative + for(int i=0; i= 0){ + positveValueCount++; + } + } + //if all the constraints are positive now,the table is optimal + if(positveValueCount == size){ + isOptimal = true; + } + return isOptimal; +} + + +template +SYCL_EXTERNAL int findPivotColumn(device_ptr C, int size) { + int location = 0; + float minimum = C[0]; + for(int i=1; i +SYCL_EXTERNAL int findPivotRow(device_ptr A, device_ptr B, device_ptr C, int pivotColumn, int rows, int cols, bool *isUnbounded) { + int negativeValueCount = 0; + for (int i = 0; i < rows; i++) { + // 2d to 1d array index mapping + int pivotColumnIndex = (i*cols)+pivotColumn; + if (A[pivotColumnIndex] <= 0) { + negativeValueCount += 1; + } + } + int location = 0; + //checking the unbound condition if all the values are negative ones + if (negativeValueCount == rows) { + *isUnbounded = true; + } else { + float minimum = 99999999.0; + + for (int i = 0; i < rows; ++i) { + // 2d to 1d array index mapping + int pivotColumnIndex = (i*cols)+pivotColumn; + float tmpACols = A[pivotColumnIndex]; + if (tmpACols > 0) { + float result = B[i] / tmpACols; + if (result > 0 && result < minimum) { + minimum = result; + location = i; + } + } + } + } + return location; +} + +template +SYCL_EXTERNAL void doPivotting(device_ptr A, device_ptr B, device_ptr C, int pivotRow, int pivotColumn, int rows, int cols) { + int columnIndex = (pivotRow*cols)+pivotColumn; + float pivetValue = A[columnIndex]; + + float pivotRowVals[6]; //the column with the pivot + float pivotColVals[3]; //the row with the pivot + float rowNew[6]; //the row after processing the pivot value + + float maximum = 0; + maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step + + + //get the row that has the pivot value + for (int i = 0; i < cols; ++i) { + int pivotRowIndex = (pivotRow*cols)+i; + pivotRowVals[i] = A[pivotRowIndex]; + } + //get the column that has the pivot value + for (int j = 0; j < rows; ++j) { + int pivotColIndex = (j*cols)+pivotColumn; + pivotColVals[j] = A[pivotColIndex]; + } + + //set the row values that has the pivot value divided by the pivot value and put into new row + for (int k = 0; k < cols; ++k) { + rowNew[k] = pivotRowVals[k]/pivetValue; + } + + B[pivotRow] = B[pivotRow]/pivetValue; + + //process the other coefficients in the A array by subtracting + for (int m=0; m < rows; ++m) { + //ignore the pivot row as we already calculated that + if (m != pivotRow) { + for (int p = 0; p &inAHost, std::vector &inBHost, + std::vector &inCHost, std::vector& resultFlags) { + + int rowSizeA = inBHost.size(); + int colSizeA = inCHost.size(); + + T *inADevice = malloc_device (inAHost.size(), q); + T *inBDevice = malloc_device (inBHost.size(), q); + T *inCDevice = malloc_device (inCHost.size(), q); + int *inResultFlagsDevice = malloc_device (resultFlags.size(), q); + + if (inADevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inADevice'\n"; + std::terminate(); + } + if (inBDevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n"; + std::terminate(); + } + if (inCDevice == nullptr) { + std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n"; + std::terminate(); + } + + auto start = std::chrono::high_resolution_clock::now(); + + q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait(); + q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait(); + q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait(); + q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait(); + + q.submit([&](handler &h) { + h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] { + device_ptr inA(inADevice); + device_ptr inB(inBDevice); + device_ptr inC(inCDevice); + device_ptr inResultFlags(inResultFlagsDevice); + + bool tempIsOptimizal = checkOptimality(inC, colSizeA); + if (tempIsOptimizal) { + inResultFlags[0] = 1; + return; + } else { + inResultFlags[0] = 0; + } + + int pivotColumn = findPivotColumn(inC, colSizeA); + inResultFlags[1] = pivotColumn; + + inA[0] = 43.0; //test only + bool isUnbounded = true; + int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded); + inResultFlags[2] = pivotRow; + }); + }).wait(); + + q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T)).wait(); + q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T)).wait(); + q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T)).wait(); + q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int)).wait(); + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = end - start; + + sycl::free(inADevice, q); + sycl::free(inBDevice, q); + sycl::free(inCDevice, q); + sycl::free(inResultFlagsDevice, q); + + return diff.count(); +} +*/ + + +// Forward declare the kernel names in the global scope. This FPGA best practice +// reduces compiler name mangling in the optimization reports. +class SimplexCalc; + +double RunKernel(queue &q, std::vector &inAHost, std::vector &inBHost, + std::vector &inCHost, std::vector& resultFlags) { + + constexpr int value = 100000; + size_t itemSize = 1000; + range numItems{itemSize}; + + + int *parallel = malloc_shared(itemSize, q); + if (parallel == nullptr) { + sycl::free(parallel, q); + std::cout << "Shared memory allocation failure.\n"; + return -1; + } + + auto start = std::chrono::high_resolution_clock::now(); + + // Use parallel_for to populate consecutive numbers starting with a specified + // value in parallel on device. This executes the kernel. + // 1st parameter is the number of work items to use. + // 2nd parameter is the kernel, a lambda that specifies what to do per + // work item. The parameter of the lambda is the work item id. + // SYCL supports unnamed lambda kernel by default. + auto e = q.parallel_for(numItems, [=](auto i) { + parallel[i] = value + i; + }); + + // q.parallel_for() is an asynchronous call. SYCL runtime enqueues and runs + // the kernel asynchronously. Wait for the asynchronous call to complete. + e.wait(); + + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = end - start; + + sycl::free(parallel, q); + + + return diff.count(); +} -- cgit v1.2.1