forked from halide/Halide
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlesson_04_debugging_2.py
More file actions
79 lines (58 loc) · 2.52 KB
/
lesson_04_debugging_2.py
File metadata and controls
79 lines (58 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/python3
# Halide tutorial lesson 4
# This lesson demonstrates how to follow what Halide is doing at runtime.
# This lesson can be built by invoking the command:
# make tutorial_lesson_04_debugging_2
# in a shell with the current directory at the top of the halide source tree.
# Otherwise, see the platform-specific compiler invocations below.
# On linux, you can compile and run it like so:
# g++ lesson_04*.cpp -g -I ../include -L ../bin -lHalide -lpthread -ldl -o lesson_04 -std=c++11
# LD_LIBRARY_PATH=../bin ./lesson_04
# On os x:
# g++ lesson_04*.cpp -g -I ../include -L ../bin -lHalide -o lesson_04 -std=c++11
# DYLD_LIBRARY_PATH=../bin ./lesson_04
#include "Halide.h"
#include <stdio.h>
#using namespace Halide
from halide import *
def main():
gradient = Func("gradient")
x, y = Var("x"), Var("y")
# We'll define our gradient function as before.
gradient[x, y] = x + y
# And tell Halide that we'd like to be notified of all
# evaluations.
gradient.trace_stores()
# Realize the function over an 8x8 region.
print("Evaluating gradient")
output_realization = gradient.realize(8, 8)
output = Image(Int(32), output_realization)
# This will print out all the times gradient(x, y) gets
# evaluated.
# Now that we can snoop on what Halide is doing, let's try our
# first scheduling primitive. We'll make a new version of
# gradient that processes each scanline in parallel.
parallel_gradient = Func("parallel_gradient")
parallel_gradient[x, y] = x + y
# We'll also trace this function.
parallel_gradient.trace_stores()
# Things are the same so far. We've defined the algorithm, but
# haven't said anything about how to schedule it. In general,
# exploring different scheduling decisions doesn't change the code
# that describes the algorithm.
# Now we tell Halide to use a parallel for loop over the y
# coordinate. On linux we run this using a thread pool and a task
# queue. On os x we call into grand central dispatch, which does
# the same thing for us.
parallel_gradient.parallel(y)
# This time the printfs should come out of order, because each
# scanline is potentially being processed in a different
# thread. The number of threads should adapt to your system, but
# on linux you can control it manually using the environment
# variable HL_NUMTHREADS.
print("\nEvaluating parallel_gradient")
parallel_gradient.realize(8, 8)
print("Success!")
return 0
if __name__ == "__main__":
main()