0% found this document useful (0 votes)

10 views21 pages

HPC Codes

The document contains multiple code implementations using OpenMP and CUDA for various algorithms including BFS, DFS, bubble sort, merge sort, and vector addition. Each section provides code snippets along with example outputs and instructions for compiling and running the programs. Additionally, it includes installation commands for the necessary CUDA toolkit.

Uploaded by

Om Pasalkar

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views21 pages

HPC Codes

Uploaded by

Om Pasalkar

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 21

Code to implement BFS using OpenMP:

#include<iostream>
#include<stdlib.h>
#include<queue>
using namespace std;

class node
{
public:

node left, right;

int data;

};

class Breadthfs
{

public:

node insert(node , int);

void bfs(node *);

};

node insert(node root, int data)

// inserts a node in tree
{

if(!root)
{

root=new node;
root->left=NULL;
root->right=NULL;
root->data=data;
return root;
}

queue<node *> q;
q.push(root);

while(!q.empty())
{

node *temp=q.front();
q.pop();

if(temp->left==NULL)
{

temp->left=new node;
temp->left->left=NULL;
temp->left->right=NULL;
temp->left->data=data;
return root;
}
else
{

q.push(temp->left);

}

if(temp->right==NULL)
{

temp->right=new node;
temp->right->left=NULL;
temp->right->right=NULL;
temp->right->data=data;
return root;
}
else
{

q.push(temp->right);

}

}
}

void bfs(node *head)

{

queue<node*> q;
q.push(head);

int qSize;

while (!q.empty())
{
qSize = q.size();
#pragma omp parallel for
//creates parallel threads
for (int i = 0; i < qSize; i++)
{
node* currNode;
#pragma omp critical
{
currNode = q.front();
q.pop();
cout<<"\t"<<currNode->data;

}// prints parent node
#pragma omp critical
{
if(currNode->left)// push parent's left node in queue
q.push(currNode->left);
if(currNode->right)
q.push(currNode->right);
}// push parent's right node in queue
}
}

int main(){

node *root=NULL;

int data;
char ans;

do
{
cout<<"\n enter data=>";
cin>>data;

root=insert(root,data);

cout<<"do you want insert one more node?";

cin>>ans;

}while(ans=='y'||ans=='Y');

bfs(root);

return 0;
}

Run Commands:
1) g++ -fopenmp bfs.cpp -o bfs
2) ./bfs

Output:

Enter data => 5

Do you want to insert one more node? (y/n) y
Enter data => 3
Do you want to insert one more node? (y/n) y

Enter data => 2

Do you want to insert one more node? (y/n) y

Enter data => 1

Do you want to insert one more node? (y/n) y

Enter data => 7

Do you want to insert one more node? (y/n) y

Enter data => 8 Do you want to insert one more node? (y/n) n
5 3 7 2 1 8
Code to implement DFS using OpenMP:

#include<iostream>
#include <vector>
#include <stack>
#include <omp.h>

using namespace std;

const int MAX =100000;

vector<int>
graph[MAX];
bool visited[MAX];

void dfs(int node)

{
stack<int>
s;
s.push(node);

while (!s.empty())
{
int curr_node =s.top();
s.pop();

if (!visited[curr_node])
{
visited[curr_node] =true;

if (visited[curr_node])
{
cout << curr_node <<" ";
}

#pragma omp parallel for

for (int i = 0; i < graph[curr_node].size();i++)

{
int adj_node = graph[curr_node][i];
if (!visited[adj_node])
{
s.push(adj_node);
}
}
}
}
}

int main()
{
int n, m, start_node;
cout << "Enter No of Node, Edges, and start node:" ;
cin >> n >> m >> start_node;
//n: node,m:edges

cout << "Enter Pair of edges:" ;

for (int i = 0; i < m;i++)
{
int u, v;
cin >> u >> v;
//u and v: Pair of edges
graph[u].push_back(v);
graph[v].push_back(u);
}

#pragma omp parallel

for for (int i = 0; i < n;i++)
{
visited[i] = false;
}

dfs(start_node);
/* for (int i = 0; i < n;i++)
{
if (visited[i])
{
cout << i << " ";
}
}*/
return 0;
}

Output:
Code to Implement parallel bubble sort using OpenMP:

#include<iostream>
#include<stdlib.h>
#include<omp.h>
using namespace std;

void bubble(int *, int); // Pointer and Argument

void swap(int &, int &); // References as parameter

void bubble(int *a, int n) // array of integer a and size of array is n

{
for( int i = 0; i < n; i++ )
{
int first = i % 2;

#pragma omp parallel for shared(a,first)

for( int j = first; j < n-1; j += 2 )
{
if( a[ j ] > a[ j+1 ] )
{
swap( a[ j ], a[ j+1 ] );
}
}
}
}

void swap(int &a, int &b)

{

int test;
test=a;
a=b;
b=test;

int main()
{

int *a,n;
cout<<"\n Enter total no of elements=>";
cin>>n;
a=new int[n];
cout<<"\n Enter elements=>";
for(int i=0;i<n;i++)
{
cin>>a[i];
}

bubble(a,n);

cout<<"\n Sorted array is=>";

for(int i=0;i<n;i++)
{
cout<<a[i]<<endl;
}

return 0;
}

Output:

Enter total no of elements=> 6

Enter elements=> 4
6
3
1
2
5
Sorted array is=> 1
2
3
4
5
6
Code to Implement parallel Merge sort using OpenMP:

#include<iostream>
#include<stdlib.h>
#include<omp.h>
using namespace std;

void mergesort(int a[],int i,int j);

void merge(int a[],int i1,int j1,int i2,int j2);

void mergesort(int a[],int i,int j)

{
int mid;
if(i<j)
{
mid=(i+j)/2;

#pragma omp parallel sections
{

#pragma omp section

{
mergesort(a,i,mid);
}

#pragma omp section

{
mergesort(a,mid+1,j);
}
}

merge(a,i,mid,mid+1,j);
}

void merge(int a[],int i1,int j1,int i2,int j2)

{
int temp[1000];
int i,j,k;
i=i1;
j=i2;
k=0;

while(i<=j1 && j<=j2)

{
if(a[i]<a[j])
{
temp[k++]=a[i++];
}
else
{
temp[k++]=a[j++];
}
}

while(i<=j1)
{
temp[k++]=a[i++];
}

while(j<=j2)
{
temp[k++]=a[j++];
}

for(i=i1,j=0;i<=j2;i++,j++)
{
a[i]=temp[j];
}
}

int main()
{
int *a,n,i;
cout<<"\n enter total no of elements=>";
cin>>n;
a= new int[n];
cout<<"\n enter elements=>";
for(i=0;i<n;i++)
{
cin>>a[i];
}
// start=.......
//#pragma omp…..
mergesort(a, 0, n-1);
// stop…….
cout<<"\n sorted array is=>";
for(i=0;i<n;i++)
{
cout<<"\n"<<a[i];
}
// Cout<<Stop-Start
return 0;
}

Output:

enter total no of elements=>5

enter elements=>3
6
5
2
4

sorted array is=>

2
3
4
5
6
Code to Implement Min, Max, Sum and Average operations using Parallel Reduction:

#include <iostream>
//#include <vector>
#include <omp.h>
#include <climits>
using namespace std;
void min_reduction(int arr[], int n) {
int min_value = INT_MAX;
#pragma omp parallel for reduction(min: min_value)
for (int i = 0; i < n; i++) {
if (arr[i] < min_value) {
min_value = arr[i];
}
}
cout << "Minimum value: " << min_value << endl;
}

void max_reduction(int arr[], int n) {

int max_value = INT_MIN;
#pragma omp parallel for reduction(max: max_value)
for (int i = 0; i < n; i++) {
if (arr[i] > max_value) {
max_value = arr[i];
}
}
cout << "Maximum value: " << max_value << endl;
}

void sum_reduction(int arr[], int n) {

int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < n; i++) {
sum += arr[i];
}
cout << "Sum: " << sum << endl;
}

void average_reduction(int arr[], int n) {

int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < n; i++) {
sum += arr[i];
}
cout << "Average: " << (double)sum / (n-1) << endl;
}

int main() {
int *arr,n;
cout<<"\n Enter total no of elements=>";
cin>>n;
arr=new int[n];
cout<<"\n Enter elements=>";
for(int i=0;i<n;i++)
{
cin>>arr[i];
}

// int arr[] = {5, 2, 9, 1, 7, 6, 8, 3, 4};

// int n = size(arr);

min_reduction(arr, n);
max_reduction(arr, n);
sum_reduction(arr, n);
average_reduction(arr, n);
}

Output:
Compile: g++ -fopenmp min.cpp -o min
Run: ./min

Enter total no of elements=>6

Enter elements=>2
8
6
9
5
4
Minimum value: 2
Maximum value: 9
Sum: 34
Average: 6.8
CUDA Program for Addition of Two Large Vectors:

#include <stdio.h>
#include <stdlib.h>

// CUDA kernel for vector addition

__global__ void vectorAdd(int *a, int *b, int *c, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) {
c[i] = a[i] + b[i];
}
}

int main() {
int n = 1000000; // Vector size
int *a, *b, *c; // Host vectors
int *d_a, *d_b, *d_c; // Device vectors
int size = n * sizeof(int); // Size in bytes

// Allocate memory for host vectors

a = (int*) malloc(size);
b = (int*) malloc(size);
c = (int*) malloc(size);

// Initialize host vectors

for (int i = 0; i < n; i++) {
a[i] = i;
b[i] = i;
}

// Allocate memory for device vectors

cudaMalloc((void**) &d_a, size);
cudaMalloc((void**) &d_b, size);
cudaMalloc((void**) &d_c, size);

// Copy host vectors to device vectors

cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);

// Define block size and grid size

int blockSize = 256;
int gridSize = (n + blockSize - 1) / blockSize;

// Launch kernel
vectorAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);

// Copy device result vector to host result vector

cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);

// Verify the result

for (int i = 0; i < n; i++) {
if (c[i] != 2*i) {
printf("Error: c[%d] = %d\n", i, c[i]);
break;
}
}

// Free device memory

cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);

// Free host memory

free(a);
free(b);
free(c);
return 0;
}

To Install nvcc Program :

sudo apt-get install nvidia-cuda-toolkit

sudo apt-get install libglade2-0

To Install nvcc Program :

sudo apt-get install nvidia-cuda-toolkit

sudo apt-get install libglade2-0

Again run the below command -

sudo apt-get install nvidia-cuda-toolkit

CUDA Program for Addition of Two Large Vectors:

#include <stdio.h>
#define BLOCK_SIZE 16
__global__ void matrix_multiply(float *a, float *b, float *c, int n)
{
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
float sum = 0;

if (row < n && col < n)

{
for (int i = 0; i < n; ++i)
{
sum += a[row * n + i] * b[i * n + col];
}
c[row * n + col] = sum;
}
}

int main()
{
int n = 1024;
size_t size = n * n * sizeof(float);
float *a, *b, *c;
float *d_a, *d_b, *d_c;
cudaEvent_t start, stop;
float elapsed_time;
// Allocate host memory
a = (float*)malloc(size);
b = (float*)malloc(size);
c = (float*)malloc(size);

// Initialize matrices
for (int i = 0; i < n * n; ++i)
{
a[i] = i % n;
b[i] = i % n;
}

// Allocate device memory

cudaMalloc(&d_a, size);
cudaMalloc(&d_b, size);
cudaMalloc(&d_c, size);

// Copy input data to device

cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);

// Set kernel launch configuration

dim3 threads(BLOCK_SIZE, BLOCK_SIZE);
dim3 blocks((n + threads.x - 1) / threads.x, (n + threads.y - 1) / threads.y);

// Launch kernel
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
matrix_multiply<<<blocks, threads>>>(d_a, d_b, d_c, n);
cudaEventRecord(stop); cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed_time, start, stop);

// Copy output data to host

cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);

// Print elapsed time

printf("Elapsed time: %f ms\n", elapsed_time);

// Free device memory

cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);

// Free host memory

free(a);
free(b);
free(c);
return 0;
}

Parallel Computing Lab Manual
No ratings yet
Parallel Computing Lab Manual
26 pages
Lab 1 & 2
No ratings yet
Lab 1 & 2
6 pages
Converted Text
No ratings yet
Converted Text
25 pages
Ads Lab Manual
No ratings yet
Ads Lab Manual
124 pages
HPC Codes
No ratings yet
HPC Codes
18 pages
Parallel Computing Manual
No ratings yet
Parallel Computing Manual
15 pages
CC Lab Manual
No ratings yet
CC Lab Manual
39 pages
HPC Practicals
No ratings yet
HPC Practicals
26 pages
Program1 PP
No ratings yet
Program1 PP
5 pages
All HPC Programs
No ratings yet
All HPC Programs
16 pages
HPC Codes-2
No ratings yet
HPC Codes-2
15 pages
HPC 123
No ratings yet
HPC 123
6 pages
HPC Output
No ratings yet
HPC Output
12 pages
HPC Printout 1
No ratings yet
HPC Printout 1
22 pages
Program 1
No ratings yet
Program 1
3 pages
Dsa Lab Print
No ratings yet
Dsa Lab Print
56 pages
AdvancedDataStructures Lab Manual
No ratings yet
AdvancedDataStructures Lab Manual
9 pages
Bubble
No ratings yet
Bubble
1 page
Bubble Sort
No ratings yet
Bubble Sort
2 pages
ADS Lab Imp. Questions - I MCA 24
No ratings yet
ADS Lab Imp. Questions - I MCA 24
7 pages
PC Labmanual
No ratings yet
PC Labmanual
19 pages
HPCPractical 2
No ratings yet
HPCPractical 2
3 pages
DFS
No ratings yet
DFS
5 pages
ADA FullExperiment
No ratings yet
ADA FullExperiment
42 pages
Parallel Merge Sort
No ratings yet
Parallel Merge Sort
2 pages
Bfs
No ratings yet
Bfs
3 pages
Bubble Sort
No ratings yet
Bubble Sort
1 page
Unit 4 - Threads - Program Solution
No ratings yet
Unit 4 - Threads - Program Solution
9 pages
DFS
No ratings yet
DFS
2 pages
Daa Cat2
No ratings yet
Daa Cat2
27 pages
ADA Lab Abhi
No ratings yet
ADA Lab Abhi
23 pages
Binary Search Tree With Tree Tranversal Techniques: EX - NO:9 Date
No ratings yet
Binary Search Tree With Tree Tranversal Techniques: EX - NO:9 Date
19 pages
Parallel Algorithms in C++
No ratings yet
Parallel Algorithms in C++
2 pages
Parallel Programming
No ratings yet
Parallel Programming
10 pages
DS Code
No ratings yet
DS Code
20 pages
Document 22
No ratings yet
Document 22
62 pages
#Include Stdio.h
No ratings yet
#Include Stdio.h
3 pages
Multisort Omp Cut - Off.c
No ratings yet
Multisort Omp Cut - Off.c
5 pages
Daa 1
No ratings yet
Daa 1
25 pages
Dsa All Assign (5-10)
No ratings yet
Dsa All Assign (5-10)
30 pages
Container: Vector
No ratings yet
Container: Vector
5 pages
Merge Sort
No ratings yet
Merge Sort
2 pages
DAA Assignment
No ratings yet
DAA Assignment
20 pages
C Prog
No ratings yet
C Prog
8 pages
Grocery Ads Good Through May 4, 2010
No ratings yet
Grocery Ads Good Through May 4, 2010
2 pages
Mayank Report
No ratings yet
Mayank Report
31 pages
CORE - 14 - Algorithm Design Techiniques
No ratings yet
CORE - 14 - Algorithm Design Techiniques
14 pages
Adsa New
No ratings yet
Adsa New
27 pages
C++ Guides
No ratings yet
C++ Guides
6 pages
CP4292 Mcap
No ratings yet
CP4292 Mcap
24 pages
Group5 Siteselection
No ratings yet
Group5 Siteselection
29 pages
Assignment Analysis Design of Algorithm Anshika Chauhan 0103CS191041
No ratings yet
Assignment Analysis Design of Algorithm Anshika Chauhan 0103CS191041
51 pages
S.No. Name of The Program Date Signat Ur E: Index
No ratings yet
S.No. Name of The Program Date Signat Ur E: Index
25 pages
OpenMP Programming Examples
No ratings yet
OpenMP Programming Examples
29 pages
AVR128DA28 32 48 64 Data Sheet 40002183C
No ratings yet
AVR128DA28 32 48 64 Data Sheet 40002183C
684 pages
2009 Chamber Membership List
100% (1)
2009 Chamber Membership List
2 pages
Corporate Governance&amp CSR Lecture 6
No ratings yet
Corporate Governance&amp CSR Lecture 6
26 pages
Wireless World 1983 03
No ratings yet
Wireless World 1983 03
126 pages
HPCNA Ethics & Conducts-210623-021012
No ratings yet
HPCNA Ethics & Conducts-210623-021012
63 pages
NLP-Based Intelligent Tagging System
No ratings yet
NLP-Based Intelligent Tagging System
23 pages
Visualizing Association Rules: Introduction To The R-Extension Package Arulesviz
No ratings yet
Visualizing Association Rules: Introduction To The R-Extension Package Arulesviz
24 pages
Data Structures Implementation
No ratings yet
Data Structures Implementation
9 pages
Algorithm Lab Report
No ratings yet
Algorithm Lab Report
19 pages
Sds
No ratings yet
Sds
22 pages
Data Structures Assignment Solutions
No ratings yet
Data Structures Assignment Solutions
21 pages
2015 Renault Trafic 63463 PDF
No ratings yet
2015 Renault Trafic 63463 PDF
292 pages
Avii 2
No ratings yet
Avii 2
16 pages
Design of Linear Quadratic Regulator For Rotary Inverted Pendulum Using Labview
No ratings yet
Design of Linear Quadratic Regulator For Rotary Inverted Pendulum Using Labview
5 pages
SDN Mini
No ratings yet
SDN Mini
13 pages
Daa Practial
No ratings yet
Daa Practial
32 pages
DL Mini
No ratings yet
DL Mini
12 pages
The Critical Succesfactor of The Client Consultant Relationship
No ratings yet
The Critical Succesfactor of The Client Consultant Relationship
26 pages
Dsa Practical Questions
No ratings yet
Dsa Practical Questions
40 pages
Automatic Night Lamp With
No ratings yet
Automatic Night Lamp With
3 pages
HIRA Night Works
No ratings yet
HIRA Night Works
13 pages
Insurance Premium Rates Guide
No ratings yet
Insurance Premium Rates Guide
6 pages
Para Banking by Management Fund A
No ratings yet
Para Banking by Management Fund A
32 pages
Master Study & Visa Guide Germany
100% (1)
Master Study & Visa Guide Germany
6 pages
Strategic Change - 2022 - Joy - Digital Future of Luxury Brands Metaverse Digital Fashion and Non Fungible Tokens
No ratings yet
Strategic Change - 2022 - Joy - Digital Future of Luxury Brands Metaverse Digital Fashion and Non Fungible Tokens
7 pages
BE LP5 Manual 23-24
No ratings yet
BE LP5 Manual 23-24
67 pages
Flexitallic Flexpro Brochure 11-30-2017
No ratings yet
Flexitallic Flexpro Brochure 11-30-2017
8 pages
Lecture 5 PDF
No ratings yet
Lecture 5 PDF
22 pages
51 ArticleText 102 4 10 20200805
No ratings yet
51 ArticleText 102 4 10 20200805
7 pages
Secure Cloud Architecture in AWS VPC
No ratings yet
Secure Cloud Architecture in AWS VPC
29 pages
LP-3 (Information & Cyber Security) Lab Manual 2021-22
No ratings yet
LP-3 (Information & Cyber Security) Lab Manual 2021-22
37 pages
Ai Questions Paper
No ratings yet
Ai Questions Paper
26 pages
BBA Students: Globalization Insights
No ratings yet
BBA Students: Globalization Insights
4 pages
WT Assignment 6
No ratings yet
WT Assignment 6
11 pages
Dsbda
No ratings yet
Dsbda
12 pages
Resumos GLO
No ratings yet
Resumos GLO
20 pages
Pushover-Based Risk Assessment Method:: A Practical Tool For Risk Assessment of Building Structures
No ratings yet
Pushover-Based Risk Assessment Method:: A Practical Tool For Risk Assessment of Building Structures
14 pages
Secure Cloud VPC
No ratings yet
Secure Cloud VPC
16 pages
4 Startup Roles To Hire
No ratings yet
4 Startup Roles To Hire
8 pages
LEED AP ID+C Candidate Handbook
No ratings yet
LEED AP ID+C Candidate Handbook
32 pages
### The Opium Trade Between The British East India Company and China
No ratings yet
### The Opium Trade Between The British East India Company and China
2 pages
Military Flight Simulators
No ratings yet
Military Flight Simulators
3 pages
Basic Conducting Online Lesson Plan 3 31
No ratings yet
Basic Conducting Online Lesson Plan 3 31
1 page

HPC Codes

Uploaded by

HPC Codes

Uploaded by

Code to implement BFS using OpenMP:

node *left, *right;

node *insert(node *, int);

node *insert(node *root, int data)

void bfs(node *head)

​ cout<<"do you want insert one more node?";

Enter data => 5

Enter data => 2

Enter data => 1

Enter data => 7

using namespace std;

const int MAX =100000;

void dfs(int node)

#pragma omp parallel for

for (int i = 0; i < graph[curr_node].size();i++)

cout << "Enter Pair of edges:" ;

#pragma omp parallel

void bubble(int *, int); // Pointer and Argument

void bubble(int *a, int n) // array of integer a and size of array is n

​ #pragma omp parallel for shared(a,first)

void swap(int &a, int &b)

cout<<"\n Sorted array is=>";

Enter total no of elements=> 6

void mergesort(int a[],int i,int j);

void mergesort(int a[],int i,int j)

​ #pragma omp section

​ #pragma omp section

void merge(int a[],int i1,int j1,int i2,int j2)

​ while(i<=j1 && j<=j2)

enter total no of elements=>5

sorted array is=>

void max_reduction(int arr[], int n) {

void sum_reduction(int arr[], int n) {

void average_reduction(int arr[], int n) {

// int arr[] = {5, 2, 9, 1, 7, 6, 8, 3, 4};

Enter total no of elements=>6

// CUDA kernel for vector addition

// Allocate memory for host vectors

// Initialize host vectors

// Allocate memory for device vectors

// Copy host vectors to device vectors

// Define block size and grid size

// Copy device result vector to host result vector

// Verify the result

// Free device memory

// Free host memory

To Install nvcc Program :

sudo apt-get install nvidia-cuda-toolkit

sudo apt-get install libglade2-0

sudo apt-get install nvidia-cuda-toolkit

sudo apt-get install libglade2-0

Again run the below command -

CUDA Program for Addition of Two Large Vectors:

if (row < n && col < n)

// Allocate device memory

// Copy input data to device

// Set kernel launch configuration

// Copy output data to host

// Print elapsed time

// Free device memory

// Free host memory

You might also like

node left, right;

node insert(node , int);

node insert(node root, int data)

cout<<"do you want insert one more node?";

#pragma omp parallel for shared(a,first)

#pragma omp section

#pragma omp section

while(i<=j1 && j<=j2)