summaryrefslogtreecommitdiff
path: root/simplex-glpk/src/kernel.cpp
diff options
context:
space:
mode:
authorPasha <pasha@member.fsf.org>2023-01-27 00:54:07 +0000
committerPasha <pasha@member.fsf.org>2023-01-27 00:54:07 +0000
commitef800d4ffafdbde7d7a172ad73bd984b1695c138 (patch)
tree920cc189130f1e98f252283fce94851443641a6d /simplex-glpk/src/kernel.cpp
parentec4ae3c2b5cb0e83fb667f14f832ea94f68ef075 (diff)
downloadoneapi-master.tar.gz
oneapi-master.tar.bz2
simplex-glpk with modified glpk for fpgaHEADmaster
Diffstat (limited to 'simplex-glpk/src/kernel.cpp')
-rw-r--r--simplex-glpk/src/kernel.cpp263
1 files changed, 263 insertions, 0 deletions
diff --git a/simplex-glpk/src/kernel.cpp b/simplex-glpk/src/kernel.cpp
new file mode 100644
index 0000000..27e743a
--- /dev/null
+++ b/simplex-glpk/src/kernel.cpp
@@ -0,0 +1,263 @@
+#include <sycl/ext/intel/fpga_extensions.hpp>
+#include <chrono>
+#include "kernel.hpp"
+
+/*
+template<typename T>
+SYCL_EXTERNAL bool checkOptimality(device_ptr<T> C, int size) {
+ bool isOptimal = false;
+ int positveValueCount = 0;
+ //check if the coefficients of the objective function are negative
+ for(int i=0; i<size;i++){
+ float value = C[i];
+ if(value >= 0){
+ positveValueCount++;
+ }
+ }
+ //if all the constraints are positive now,the table is optimal
+ if(positveValueCount == size){
+ isOptimal = true;
+ }
+ return isOptimal;
+}
+
+
+template<typename T>
+SYCL_EXTERNAL int findPivotColumn(device_ptr<T> C, int size) {
+ int location = 0;
+ float minimum = C[0];
+ for(int i=1; i<size; ++i) {
+ if(C[i]<minimum) {
+ minimum = C[i];
+ location = i;
+ }
+ }
+ return location;
+}
+
+
+//find the row with the pivot value.The least value item's row in the B array
+template<typename T>
+SYCL_EXTERNAL int findPivotRow(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotColumn, int rows, int cols, bool *isUnbounded) {
+ int negativeValueCount = 0;
+ for (int i = 0; i < rows; i++) {
+ // 2d to 1d array index mapping
+ int pivotColumnIndex = (i*cols)+pivotColumn;
+ if (A[pivotColumnIndex] <= 0) {
+ negativeValueCount += 1;
+ }
+ }
+ int location = 0;
+ //checking the unbound condition if all the values are negative ones
+ if (negativeValueCount == rows) {
+ *isUnbounded = true;
+ } else {
+ float minimum = 99999999.0;
+
+ for (int i = 0; i < rows; ++i) {
+ // 2d to 1d array index mapping
+ int pivotColumnIndex = (i*cols)+pivotColumn;
+ float tmpACols = A[pivotColumnIndex];
+ if (tmpACols > 0) {
+ float result = B[i] / tmpACols;
+ if (result > 0 && result < minimum) {
+ minimum = result;
+ location = i;
+ }
+ }
+ }
+ }
+ return location;
+}
+
+template<typename T>
+SYCL_EXTERNAL void doPivotting(device_ptr<T> A, device_ptr<T> B, device_ptr<T> C, int pivotRow, int pivotColumn, int rows, int cols) {
+ int columnIndex = (pivotRow*cols)+pivotColumn;
+ float pivetValue = A[columnIndex];
+
+ float pivotRowVals[6]; //the column with the pivot
+ float pivotColVals[3]; //the row with the pivot
+ float rowNew[6]; //the row after processing the pivot value
+
+ float maximum = 0;
+ maximum = maximum-(C[pivotColumn]*(B[pivotRow]/pivetValue)); //set the maximum step by step
+
+
+ //get the row that has the pivot value
+ for (int i = 0; i < cols; ++i) {
+ int pivotRowIndex = (pivotRow*cols)+i;
+ pivotRowVals[i] = A[pivotRowIndex];
+ }
+ //get the column that has the pivot value
+ for (int j = 0; j < rows; ++j) {
+ int pivotColIndex = (j*cols)+pivotColumn;
+ pivotColVals[j] = A[pivotColIndex];
+ }
+
+ //set the row values that has the pivot value divided by the pivot value and put into new row
+ for (int k = 0; k < cols; ++k) {
+ rowNew[k] = pivotRowVals[k]/pivetValue;
+ }
+
+ B[pivotRow] = B[pivotRow]/pivetValue;
+
+ //process the other coefficients in the A array by subtracting
+ for (int m=0; m < rows; ++m) {
+ //ignore the pivot row as we already calculated that
+ if (m != pivotRow) {
+ for (int p = 0; p<cols; ++p) {
+ float multiplyValue = pivotColVals[m];
+ int indexA_M_P = (m*cols)+p;
+ A[indexA_M_P] = A[indexA_M_P] - (multiplyValue * rowNew[p]);
+ //C[p] = C[p] - (multiplyValue*C[pivotRow]);
+ //B[i] = B[i] - (multiplyValue*B[pivotRow]);
+ }
+
+ }
+ }
+
+ //process the values of the B array
+ for (int i = 0; i<rows; ++i) { // rows = B.size()
+ if (i != pivotRow) {
+ float multiplyValue = pivotColVals[i];
+ B[i] = B[i]-(multiplyValue*B[pivotRow]);
+
+ }
+ }
+ //the least coefficient of the constraints of the objective function
+ float multiplyValue = C[pivotColumn];
+ //process the C array
+ for (int i = 0; i < C.size(); i++) {
+ C[i] = C[i]-(multiplyValue * rowNew[i]);
+
+ }
+
+ //replacing the pivot row in the new calculated A array
+ for (int i = 0; i<cols; ++i) {
+ int indexA_pivotRow_i = (pivotRow*cols)+i;
+ A[indexA_pivotRow_i] = rowNew[i];
+ }
+}
+
+// Forward declare the kernel names in the global scope. This FPGA best practice
+// reduces compiler name mangling in the optimization reports.
+class SimplexCalc;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+ std::vector<T> &inCHost, std::vector<int>& resultFlags) {
+
+ int rowSizeA = inBHost.size();
+ int colSizeA = inCHost.size();
+
+ T *inADevice = malloc_device<T> (inAHost.size(), q);
+ T *inBDevice = malloc_device<T> (inBHost.size(), q);
+ T *inCDevice = malloc_device<T> (inCHost.size(), q);
+ int *inResultFlagsDevice = malloc_device<int> (resultFlags.size(), q);
+
+ if (inADevice == nullptr) {
+ std::cerr << "ERROR: failed to allocate space for 'inADevice'\n";
+ std::terminate();
+ }
+ if (inBDevice == nullptr) {
+ std::cerr << "ERROR: failed to allocate space for 'inBDevice'\n";
+ std::terminate();
+ }
+ if (inCDevice == nullptr) {
+ std::cerr << "ERROR: failed to allocate space for 'inCDevice'\n";
+ std::terminate();
+ }
+
+ auto start = std::chrono::high_resolution_clock::now();
+
+ q.memcpy(inADevice, inAHost.data(), inAHost.size()*sizeof(T)).wait();
+ q.memcpy(inBDevice, inBHost.data(), inBHost.size() * sizeof(T)).wait();
+ q.memcpy(inCDevice, inCHost.data(), inCHost.size() * sizeof(T)).wait();
+ q.memcpy(inResultFlagsDevice, resultFlags.data(), resultFlags.size()*sizeof(int)).wait();
+
+ q.submit([&](handler &h) {
+ h.single_task < SimplexCalc > ([=]()[[intel::kernel_args_restrict]] {
+ device_ptr<T> inA(inADevice);
+ device_ptr<T> inB(inBDevice);
+ device_ptr<T> inC(inCDevice);
+ device_ptr<int> inResultFlags(inResultFlagsDevice);
+
+ bool tempIsOptimizal = checkOptimality(inC, colSizeA);
+ if (tempIsOptimizal) {
+ inResultFlags[0] = 1;
+ return;
+ } else {
+ inResultFlags[0] = 0;
+ }
+
+ int pivotColumn = findPivotColumn(inC, colSizeA);
+ inResultFlags[1] = pivotColumn;
+
+ inA[0] = 43.0; //test only
+ bool isUnbounded = true;
+ int pivotRow = findPivotRow(inA, inB, inC, pivotColumn, rowSizeA, colSizeA, &isUnbounded);
+ inResultFlags[2] = pivotRow;
+ });
+ }).wait();
+
+ q.memcpy(inAHost.data(), inADevice, inAHost.size()*sizeof(T)).wait();
+ q.memcpy(inBHost.data(), inBDevice, inBHost.size()*sizeof(T)).wait();
+ q.memcpy(inCHost.data(), inCDevice, inCHost.size()*sizeof(T)).wait();
+ q.memcpy(resultFlags.data(), inResultFlagsDevice, resultFlags.size()*sizeof(int)).wait();
+
+ auto end = std::chrono::high_resolution_clock::now();
+ std::chrono::duration<double, std::milli> diff = end - start;
+
+ sycl::free(inADevice, q);
+ sycl::free(inBDevice, q);
+ sycl::free(inCDevice, q);
+ sycl::free(inResultFlagsDevice, q);
+
+ return diff.count();
+}
+*/
+
+
+// Forward declare the kernel names in the global scope. This FPGA best practice
+// reduces compiler name mangling in the optimization reports.
+class SimplexCalc;
+
+double RunKernel(queue &q, std::vector<T> &inAHost, std::vector<T> &inBHost,
+ std::vector<T> &inCHost, std::vector<int>& resultFlags) {
+
+ constexpr int value = 100000;
+ size_t itemSize = 1000;
+ range numItems{itemSize};
+
+
+ int *parallel = malloc_shared<int>(itemSize, q);
+ if (parallel == nullptr) {
+ sycl::free(parallel, q);
+ std::cout << "Shared memory allocation failure.\n";
+ return -1;
+ }
+
+ auto start = std::chrono::high_resolution_clock::now();
+
+ // Use parallel_for to populate consecutive numbers starting with a specified
+ // value in parallel on device. This executes the kernel.
+ // 1st parameter is the number of work items to use.
+ // 2nd parameter is the kernel, a lambda that specifies what to do per
+ // work item. The parameter of the lambda is the work item id.
+ // SYCL supports unnamed lambda kernel by default.
+ auto e = q.parallel_for(numItems, [=](auto i) {
+ parallel[i] = value + i;
+ });
+
+ // q.parallel_for() is an asynchronous call. SYCL runtime enqueues and runs
+ // the kernel asynchronously. Wait for the asynchronous call to complete.
+ e.wait();
+
+
+ auto end = std::chrono::high_resolution_clock::now();
+ std::chrono::duration<double, std::milli> diff = end - start;
+
+ sycl::free(parallel, q);
+
+
+ return diff.count();
+}