Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Bad C++ code in benchmark #2231

Open
Open
@oleid

Description

@oleid

With great interest I read the following blog post:

https://lpython.org/blog/2023/07/lpython-novel-fast-retargetable-python-compiler/

But to my surprise I discovered the C++ code in the benchmark of dijkstra_shortest_path is pessimized rather than optimized.

The usage of maps makes this Code much, much slower as it could.
On my system (some ancient i7 laptop), the following is more than 40 times faster (see below) than the original code.

#include <iostream>
#include <vector>

int32_t dijkstra_shortest_path(int32_t n, int32_t source) {
    int32_t i, j, v, u, mindist, alt, dummy, uidx;
    std::vector<int32_t>  dist, prev;
    std::vector<bool> visited;
    std::vector<int32_t> Q, graph(n*n);
    Q.reserve(n);
    dist.reserve(n);
    prev.reserve(n);
    
    for(i = 0; i < n; i++) {
        for(j = 0; j < n; j++) {
            graph[n * i + j] = std::abs(i - j);
        }
    }

    for(v = 0; v < n; v++) {
        dist.push_back(2147483647);
        prev.push_back(-1);
        Q.push_back(v);
        visited.push_back(false);
    }
    dist[source] = 0;

    while(Q.size() > 0) {
        u = -1;
        mindist = 2147483647;
        for(i = 0; i < Q.size(); i++) {
            if( mindist > dist[Q[i]] ) {
                mindist = dist[Q[i]];
                u = Q[i];
                uidx = i;
            }
        }
        Q.erase(Q.begin() + uidx);
        visited[u] = true;

        for(v = 0; v < n; v++) {
            if( v != u and not visited[v] ) {
                alt = dist[u] + graph[n * u + v];

                if( alt < dist[v] ) {
                    dist[v] = alt;
                    prev[v] = u;
                }
            }
        }
    }

    int32_t dist_sum = 0;
    for(i = 0; i < n; i++) {
        dist_sum += dist[i];
    }
    return dist_sum;
}


int main() {
    int32_t n = 4000;
    int32_t dist_sum = dijkstra_shortest_path(n, 0);
    std::cout<<dist_sum<<std::endl;
    return 0;
}

For convenience, this is the diff:

--- a.cpp	2023-07-30 10:59:17.482487750 +0200
+++ f.cpp	2023-07-30 14:57:58.543699027 +0200
@@ -1,13 +1,15 @@
 #include <iostream>
-#include <unordered_map>
 #include <vector>
 
 int32_t dijkstra_shortest_path(int32_t n, int32_t source) {
     int32_t i, j, v, u, mindist, alt, dummy, uidx;
-    std::unordered_map<int32_t, int32_t> graph, dist, prev;
-    std::unordered_map<int32_t, bool> visited;
-    std::vector<int32_t> Q;
-
+    std::vector<int32_t>  dist, prev;
+    std::vector<bool> visited;
+    std::vector<int32_t> Q, graph(n*n);
+    Q.reserve(n);
+    dist.reserve(n);
+    prev.reserve(n);
+    
     for(i = 0; i < n; i++) {
         for(j = 0; j < n; j++) {
             graph[n * i + j] = std::abs(i - j);
@@ -15,10 +17,10 @@
     }
 
     for(v = 0; v < n; v++) {
-        dist[v] = 2147483647;
-        prev[v] = -1;
+        dist.push_back(2147483647);
+        prev.push_back(-1);
         Q.push_back(v);
-        visited[v] = false;
+        visited.push_back(false);
     }
     dist[source] = 0;

Furthermore, I implemented a rust version for comparison.

fn dijkstra_shortest_path(n: i32, source: i32) -> i32 {
    let mut dist = Vec::with_capacity(n as usize);
    let mut prev = Vec::with_capacity(n as usize);
    let mut visited = Vec::with_capacity(n as usize);
    let mut q = Vec::with_capacity((n * n) as usize);
    let mut graph = vec![0; (n * n) as usize];

    for i in 0..n {
        for j in 0..n {
            graph[(n * i + j) as usize] = (i - j).abs();
        }
    }

    for v in 0..n {
        dist.push(2147483647);
        prev.push(-1);
        q.push(v);
        visited.push(false);
    }
    dist[source as usize] = 0;

    let mut u;
    let mut uidx = 0;
    let mut mindist;
    let mut alt;
    while q.len() > 0 {
        u = -1;
        mindist = 2147483647;
        for i in 0..q.len() {
            let d = dist[q[i] as usize];
            if mindist > d {
                mindist = d;
                u = q[i];
                uidx = i;
            }
        }
        q.remove(uidx);
        visited[u as usize] = true;

        for v in 0..n {
            if v != u && !visited[v as usize] {
                alt = dist[u as usize] + graph[(n * u + v) as usize];

                if alt < dist[v as usize] {
                    dist[v as usize] = alt;
                    prev[v as usize] = u;
                }
            }
        }
    }

    dist.iter().sum()
}

fn main() {
    let n = 4000;
    let dist_sum = dijkstra_shortest_path(n, 0);
    println!("{}\n", dist_sum);
}

Benchmark

$ clang++ -std=c++20 -ffast-math -O3 -march=native old.cpp   -o old
$ clang++ -std=c++20 -ffast-math -O3 -march=native better.cpp   -o better
$ rustc main.rs -C opt-level=3  -o better-rust
$ hyperfine --warmup=20 --min-runs=10  ./old ./better ./better-rust 
Benchmark 1: ./old
  Time (mean ± σ):      1.690 s ±  0.112 s    [User: 1.380 s, System: 0.299 s]
  Range (min … max):    1.407 s …  1.790 s    10 runs
 
Benchmark 2: ./better
  Time (mean ± σ):      40.3 ms ±   2.7 ms    [User: 31.5 ms, System: 8.2 ms]
  Range (min … max):    34.4 ms …  45.8 ms    71 runs
 
Benchmark 3: ./better-rust
  Time (mean ± σ):      40.4 ms ±   1.7 ms    [User: 32.8 ms, System: 7.5 ms]
  Range (min … max):    37.7 ms …  44.2 ms    68 runs
 
Summary
  ./better ran
    1.00 ± 0.08 times faster than ./better-rust
   41.93 ± 3.97 times faster than ./old

(compilers used: rustc 1.71.0-nightly (2f2c438dc 2023-05-08) and clang version 15.0.7 on ArchLinux)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions