Code to implement BFS using OpenMP:
#include<iostream>
#include<stdlib.h>
#include<queue>
using namespace std;
class node
{
public:
node *left, *right;
int data;
};
class Breadthfs
{
public:
node *insert(node *, int);
void bfs(node *);
};
node *insert(node *root, int data)
// inserts a node in tree
{
if(!root)
{
root=new node;
root->left=NULL;
root->right=NULL;
root->data=data;
return root;
}
queue<node *> q;
q.push(root);
while(!q.empty())
{
node *temp=q.front();
q.pop();
if(temp->left==NULL)
{
temp->left=new node;
temp->left->left=NULL;
temp->left->right=NULL;
temp->left->data=data;
return root;
}
else
{
q.push(temp->left);
}
if(temp->right==NULL)
{
temp->right=new node;
temp->right->left=NULL;
temp->right->right=NULL;
temp->right->data=data;
return root;
}
else
{
q.push(temp->right);
}
}
}
void bfs(node *head)
{
queue<node*> q;
q.push(head);
int qSize;
while (!q.empty())
{
qSize = q.size();
#pragma omp parallel for
//creates parallel threads
for (int i = 0; i < qSize; i++)
{
node* currNode;
#pragma omp critical
{
currNode = q.front();
q.pop();
cout<<"\t"<<currNode->data;
}// prints parent node
#pragma omp critical
{
if(currNode->left)// push parent's left node in queue
q.push(currNode->left);
if(currNode->right)
q.push(currNode->right);
}// push parent's right node in queue
}
}
int main(){
node *root=NULL;
int data;
char ans;
do
{
cout<<"\n enter data=>";
cin>>data;
root=insert(root,data);
cout<<"do you want insert one more node?";
cin>>ans;
}while(ans=='y'||ans=='Y');
bfs(root);
return 0;
}
Run Commands:
1) g++ -fopenmp bfs.cpp -o bfs
2) ./bfs
Output:
Enter data => 5
Do you want to insert one more node? (y/n) y
Enter data => 3
Do you want to insert one more node? (y/n) y
Enter data => 2
Do you want to insert one more node? (y/n) y
Enter data => 1
Do you want to insert one more node? (y/n) y
Enter data => 7
Do you want to insert one more node? (y/n) y
Enter data => 8 Do you want to insert one more node? (y/n) n
5 3 7 2 1 8
Code to implement DFS using OpenMP:
#include<iostream>
#include <vector>
#include <stack>
#include <omp.h>
using namespace std;
const int MAX =100000;
vector<int>
graph[MAX];
bool visited[MAX];
void dfs(int node)
{
stack<int>
s;
s.push(node);
while (!s.empty())
{
int curr_node =s.top();
s.pop();
if (!visited[curr_node])
{
visited[curr_node] =true;
if (visited[curr_node])
{
cout << curr_node <<" ";
}
#pragma omp parallel for
for (int i = 0; i < graph[curr_node].size();i++)
{
int adj_node = graph[curr_node][i];
if (!visited[adj_node])
{
s.push(adj_node);
}
}
}
}
}
int main()
{
int n, m, start_node;
cout << "Enter No of Node, Edges, and start node:" ;
cin >> n >> m >> start_node;
//n: node,m:edges
cout << "Enter Pair of edges:" ;
for (int i = 0; i < m;i++)
{
int u, v;
cin >> u >> v;
//u and v: Pair of edges
graph[u].push_back(v);
graph[v].push_back(u);
}
#pragma omp parallel
for for (int i = 0; i < n;i++)
{
visited[i] = false;
}
dfs(start_node);
/* for (int i = 0; i < n;i++)
{
if (visited[i])
{
cout << i << " ";
}
}*/
return 0;
}
Output:
Code to Implement parallel bubble sort using OpenMP:
#include<iostream>
#include<stdlib.h>
#include<omp.h>
using namespace std;
void bubble(int *, int); // Pointer and Argument
void swap(int &, int &); // References as parameter
void bubble(int *a, int n) // array of integer a and size of array is n
{
for( int i = 0; i < n; i++ )
{
int first = i % 2;
#pragma omp parallel for shared(a,first)
for( int j = first; j < n-1; j += 2 )
{
if( a[ j ] > a[ j+1 ] )
{
swap( a[ j ], a[ j+1 ] );
}
}
}
}
void swap(int &a, int &b)
{
int test;
test=a;
a=b;
b=test;
int main()
{
int *a,n;
cout<<"\n Enter total no of elements=>";
cin>>n;
a=new int[n];
cout<<"\n Enter elements=>";
for(int i=0;i<n;i++)
{
cin>>a[i];
}
bubble(a,n);
cout<<"\n Sorted array is=>";
for(int i=0;i<n;i++)
{
cout<<a[i]<<endl;
}
return 0;
}
Output:
Enter total no of elements=> 6
Enter elements=> 4
6
3
1
2
5
Sorted array is=> 1
2
3
4
5
6
Code to Implement parallel Merge sort using OpenMP:
#include<iostream>
#include<stdlib.h>
#include<omp.h>
using namespace std;
void mergesort(int a[],int i,int j);
void merge(int a[],int i1,int j1,int i2,int j2);
void mergesort(int a[],int i,int j)
{
int mid;
if(i<j)
{
mid=(i+j)/2;
#pragma omp parallel sections
{
#pragma omp section
{
mergesort(a,i,mid);
}
#pragma omp section
{
mergesort(a,mid+1,j);
}
}
merge(a,i,mid,mid+1,j);
}
void merge(int a[],int i1,int j1,int i2,int j2)
{
int temp[1000];
int i,j,k;
i=i1;
j=i2;
k=0;
while(i<=j1 && j<=j2)
{
if(a[i]<a[j])
{
temp[k++]=a[i++];
}
else
{
temp[k++]=a[j++];
}
}
while(i<=j1)
{
temp[k++]=a[i++];
}
while(j<=j2)
{
temp[k++]=a[j++];
}
for(i=i1,j=0;i<=j2;i++,j++)
{
a[i]=temp[j];
}
}
int main()
{
int *a,n,i;
cout<<"\n enter total no of elements=>";
cin>>n;
a= new int[n];
cout<<"\n enter elements=>";
for(i=0;i<n;i++)
{
cin>>a[i];
}
// start=.......
//#pragma omp…..
mergesort(a, 0, n-1);
// stop…….
cout<<"\n sorted array is=>";
for(i=0;i<n;i++)
{
cout<<"\n"<<a[i];
}
// Cout<<Stop-Start
return 0;
}
Output:
enter total no of elements=>5
enter elements=>3
6
5
2
4
sorted array is=>
2
3
4
5
6
Code to Implement Min, Max, Sum and Average operations using Parallel Reduction:
#include <iostream>
//#include <vector>
#include <omp.h>
#include <climits>
using namespace std;
void min_reduction(int arr[], int n) {
int min_value = INT_MAX;
#pragma omp parallel for reduction(min: min_value)
for (int i = 0; i < n; i++) {
if (arr[i] < min_value) {
min_value = arr[i];
}
}
cout << "Minimum value: " << min_value << endl;
}
void max_reduction(int arr[], int n) {
int max_value = INT_MIN;
#pragma omp parallel for reduction(max: max_value)
for (int i = 0; i < n; i++) {
if (arr[i] > max_value) {
max_value = arr[i];
}
}
cout << "Maximum value: " << max_value << endl;
}
void sum_reduction(int arr[], int n) {
int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < n; i++) {
sum += arr[i];
}
cout << "Sum: " << sum << endl;
}
void average_reduction(int arr[], int n) {
int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < n; i++) {
sum += arr[i];
}
cout << "Average: " << (double)sum / (n-1) << endl;
}
int main() {
int *arr,n;
cout<<"\n Enter total no of elements=>";
cin>>n;
arr=new int[n];
cout<<"\n Enter elements=>";
for(int i=0;i<n;i++)
{
cin>>arr[i];
}
// int arr[] = {5, 2, 9, 1, 7, 6, 8, 3, 4};
// int n = size(arr);
min_reduction(arr, n);
max_reduction(arr, n);
sum_reduction(arr, n);
average_reduction(arr, n);
}
Output:
Compile: g++ -fopenmp min.cpp -o min
Run: ./min
Enter total no of elements=>6
Enter elements=>2
8
6
9
5
4
Minimum value: 2
Maximum value: 9
Sum: 34
Average: 6.8
CUDA Program for Addition of Two Large Vectors:
#include <stdio.h>
#include <stdlib.h>
// CUDA kernel for vector addition
__global__ void vectorAdd(int *a, int *b, int *c, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) {
c[i] = a[i] + b[i];
}
}
int main() {
int n = 1000000; // Vector size
int *a, *b, *c; // Host vectors
int *d_a, *d_b, *d_c; // Device vectors
int size = n * sizeof(int); // Size in bytes
// Allocate memory for host vectors
a = (int*) malloc(size);
b = (int*) malloc(size);
c = (int*) malloc(size);
// Initialize host vectors
for (int i = 0; i < n; i++) {
a[i] = i;
b[i] = i;
}
// Allocate memory for device vectors
cudaMalloc((void**) &d_a, size);
cudaMalloc((void**) &d_b, size);
cudaMalloc((void**) &d_c, size);
// Copy host vectors to device vectors
cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);
// Define block size and grid size
int blockSize = 256;
int gridSize = (n + blockSize - 1) / blockSize;
// Launch kernel
vectorAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);
// Copy device result vector to host result vector
cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
// Verify the result
for (int i = 0; i < n; i++) {
if (c[i] != 2*i) {
printf("Error: c[%d] = %d\n", i, c[i]);
break;
}
}
// Free device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
// Free host memory
free(a);
free(b);
free(c);
return 0;
}
To Install nvcc Program :
sudo apt-get install nvidia-cuda-toolkit
sudo apt-get install libglade2-0
To Install nvcc Program :
sudo apt-get install nvidia-cuda-toolkit
sudo apt-get install libglade2-0
Again run the below command -
sudo apt-get install nvidia-cuda-toolkit
CUDA Program for Addition of Two Large Vectors:
#include <stdio.h>
#define BLOCK_SIZE 16
__global__ void matrix_multiply(float *a, float *b, float *c, int n)
{
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
float sum = 0;
if (row < n && col < n)
{
for (int i = 0; i < n; ++i)
{
sum += a[row * n + i] * b[i * n + col];
}
c[row * n + col] = sum;
}
}
int main()
{
int n = 1024;
size_t size = n * n * sizeof(float);
float *a, *b, *c;
float *d_a, *d_b, *d_c;
cudaEvent_t start, stop;
float elapsed_time;
// Allocate host memory
a = (float*)malloc(size);
b = (float*)malloc(size);
c = (float*)malloc(size);
// Initialize matrices
for (int i = 0; i < n * n; ++i)
{
a[i] = i % n;
b[i] = i % n;
}
// Allocate device memory
cudaMalloc(&d_a, size);
cudaMalloc(&d_b, size);
cudaMalloc(&d_c, size);
// Copy input data to device
cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);
// Set kernel launch configuration
dim3 threads(BLOCK_SIZE, BLOCK_SIZE);
dim3 blocks((n + threads.x - 1) / threads.x, (n + threads.y - 1) / threads.y);
// Launch kernel
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
matrix_multiply<<<blocks, threads>>>(d_a, d_b, d_c, n);
cudaEventRecord(stop); cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed_time, start, stop);
// Copy output data to host
cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
// Print elapsed time
printf("Elapsed time: %f ms\n", elapsed_time);
// Free device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
// Free host memory
free(a);
free(b);
free(c);
return 0;
}