Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0dff615

Browse files
ilevkivskyimichaelm-openai
authored andcommitted
Fix librt compilation on platforms with OpenMP (python#20583)
This file was missed when base64 support was originally vendored.
1 parent 0ef07c2 commit 0dff615

2 files changed

Lines changed: 150 additions & 0 deletions

File tree

mypyc/build.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class ModDesc(NamedTuple):
9797
"base64/arch/neon64/enc_loop_asm.c",
9898
"base64/codecs.h",
9999
"base64/env.h",
100+
"base64/lib_openmp.c",
100101
"base64/tables/tables.h",
101102
"base64/tables/table_dec_32bit.h",
102103
"base64/tables/table_enc_12bit.h",

mypyc/lib-rt/base64/lib_openmp.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// This code makes some assumptions on the implementation of
2+
// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
3+
// Basically these assumptions boil down to that when breaking the src into
4+
// parts, out parts can be written without side effects.
5+
// This is met when:
6+
// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
7+
// 2) the shared variables src and out are not read or written outside of the
8+
// bounds of their parts, i.e. when base64_stream_encode() reads a multiple
9+
// of 3 bytes, it must write no more then a multiple of 4 bytes, not even
10+
// temporarily;
11+
// 3) the state flag can be discarded after base64_stream_encode() and
12+
// base64_stream_decode() on the parts.
13+
14+
static inline void
15+
base64_encode_openmp
16+
( const char *src
17+
, size_t srclen
18+
, char *out
19+
, size_t *outlen
20+
, int flags
21+
)
22+
{
23+
size_t s;
24+
size_t t;
25+
size_t sum = 0, len, last_len;
26+
struct base64_state state, initial_state;
27+
int num_threads, i;
28+
29+
// Request a number of threads but not necessarily get them:
30+
#pragma omp parallel
31+
{
32+
// Get the number of threads used from one thread only,
33+
// as num_threads is a shared var:
34+
#pragma omp single
35+
{
36+
num_threads = omp_get_num_threads();
37+
38+
// Split the input string into num_threads parts, each
39+
// part a multiple of 3 bytes. The remaining bytes will
40+
// be done later:
41+
len = srclen / (num_threads * 3);
42+
len *= 3;
43+
last_len = srclen - num_threads * len;
44+
45+
// Init the stream reader:
46+
base64_stream_encode_init(&state, flags);
47+
initial_state = state;
48+
}
49+
50+
// Single has an implicit barrier for all threads to wait here
51+
// for the above to complete:
52+
#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
53+
for (i = 0; i < num_threads; i++)
54+
{
55+
// Feed each part of the string to the stream reader:
56+
base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
57+
sum += s;
58+
}
59+
}
60+
61+
// As encoding should never fail and we encode an exact multiple
62+
// of 3 bytes, we can discard state:
63+
state = initial_state;
64+
65+
// Encode the remaining bytes:
66+
base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
67+
68+
// Finalize the stream by writing trailer if any:
69+
base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
70+
71+
// Final output length is stream length plus tail:
72+
sum += s + t;
73+
*outlen = sum;
74+
}
75+
76+
static inline int
77+
base64_decode_openmp
78+
( const char *src
79+
, size_t srclen
80+
, char *out
81+
, size_t *outlen
82+
, int flags
83+
)
84+
{
85+
int num_threads, result = 0, i;
86+
size_t sum = 0, len, last_len, s;
87+
struct base64_state state, initial_state;
88+
89+
// Request a number of threads but not necessarily get them:
90+
#pragma omp parallel
91+
{
92+
// Get the number of threads used from one thread only,
93+
// as num_threads is a shared var:
94+
#pragma omp single
95+
{
96+
num_threads = omp_get_num_threads();
97+
98+
// Split the input string into num_threads parts, each
99+
// part a multiple of 4 bytes. The remaining bytes will
100+
// be done later:
101+
len = srclen / (num_threads * 4);
102+
len *= 4;
103+
last_len = srclen - num_threads * len;
104+
105+
// Init the stream reader:
106+
base64_stream_decode_init(&state, flags);
107+
108+
initial_state = state;
109+
}
110+
111+
// Single has an implicit barrier to wait here for the above to
112+
// complete:
113+
#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
114+
for (i = 0; i < num_threads; i++)
115+
{
116+
int this_result;
117+
118+
// Feed each part of the string to the stream reader:
119+
this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
120+
sum += s;
121+
result += this_result;
122+
}
123+
}
124+
125+
// If `result' equals `-num_threads', then all threads returned -1,
126+
// indicating that the requested codec is not available:
127+
if (result == -num_threads) {
128+
return -1;
129+
}
130+
131+
// If `result' does not equal `num_threads', then at least one of the
132+
// threads hit a decode error:
133+
if (result != num_threads) {
134+
return 0;
135+
}
136+
137+
// So far so good, now decode whatever remains in the buffer. Reuse the
138+
// initial state, since we are at a 4-byte boundary:
139+
state = initial_state;
140+
result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
141+
sum += s;
142+
*outlen = sum;
143+
144+
// If when decoding a whole block, we're still waiting for input then fail:
145+
if (result && (state.bytes == 0)) {
146+
return result;
147+
}
148+
return 0;
149+
}

0 commit comments

Comments
 (0)