# Install CUDA toolkit (example: CUDA 11.
2)
!apt-get update
!apt-get install -y cuda-toolkit-11-2
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease
Hit:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' do
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package cuda-toolkit-11-2
!nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0
!nvidia-smi
Mon Apr 28 03:21:41 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |
| N/A 54C P8 10W / 70W | 0MiB / 15360MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
%%writefile mul.cu
#include <iostream>
#include <cuda_runtime.h>
__global__ void matmul(int *A, int *B, int *C, int N) {
int Row = blockIdx.y * blockDim.y + threadIdx.y;
int Col = blockIdx.x * blockDim.x + threadIdx.x;
if (Row < N && Col < N) {
int Pvalue = 0;
for (int k = 0; k < N; k++) {
Pvalue += A[Row * N + k] * B[k * N + Col];
}
C[Row * N + Col] = Pvalue;
}
}
int main() {
int N;
std::cout << "Enter the size of the square matrices (N): ";
std::cin >> N;
int *A = new int[N * N];
int *B = new int[N * N];
int *C = new int[N * N];
std::cout << "Enter elements for matrix A:" << std::endl;
for (int i = 0; i < N * N; i++) {
std::cin >> A[i];
}
std::cout << "Enter elements for matrix B:" << std::endl;
for (int i = 0; i < N * N; i++) {
std::cin >> B[i];
}
int size = N * N * sizeof(int);
int *dev_A, *dev_B, *dev_C;
cudaMalloc(&dev_A, size);
cudaMalloc(&dev_B, size);
cudaMalloc(&dev_C, size);
cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_B, B, size, cudaMemcpyHostToDevice);
dim3 dimBlock(16, 16);
dim3 dimGrid((N + dimBlock.x - 1) / dimBlock.x, (N + dimBlock.y - 1) / dimBlock.y);
matmul<<<dimGrid, dimBlock>>>(dev_A, dev_B, dev_C, N);
cudaDeviceSynchronize(); // Important!
cudaMemcpy(C, dev_C, size, cudaMemcpyDeviceToHost);
std::cout << "Matrix Multiplication Results (first 10x10):" << std::endl;
for (int i = 0; i < 10 && i < N; i++) {
for (int j = 0; j < 10 && j < N; j++) {
std::cout << C[i * N + j] << " ";
}
std::cout << std::endl;
}
cudaFree(dev_A);
cudaFree(dev_B);
cudaFree(dev_C);
delete[] A;
delete[] B;
delete[] C;
return 0;
}
Writing mul.cu
!nvcc mul.cu -o mul -arch=sm_75
!./mul
Enter the size of the square matrices (N): 3
Enter elements for matrix A:
1 1 1
1 1 1
1 1 1
Enter elements for matrix B:
2
2 2 2
2 2 2
2 2 2
Matrix Multiplication Results (first 10x10):
6 6 6
6 6 6
6 6 6
Start coding or generate with AI.