Thanks to visit codestin.com
Credit goes to code.neomutt.org

NeoMutt  2025-12-11-219-g274730
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
mbyte.c
Go to the documentation of this file.
1
24
30
31#include "config.h"
32#include <limits.h>
33#include <stdbool.h>
34#include <string.h>
35#include <wchar.h>
36#include <wctype.h>
37#include "mbyte.h"
38#include "buffer.h"
39#include "charset.h"
40#include "ctype2.h"
41#include "memory.h"
42#include "pool.h"
43#include "string2.h"
44
46
56int mutt_mb_charlen(const char *s, int *width)
57{
58 if (!s || (*s == '\0'))
59 return 0;
60
61 wchar_t wc = 0;
62 mbstate_t mbstate = { 0 };
63
64 size_t n = mutt_str_len(s);
65 size_t k = mbrtowc(&wc, s, n, &mbstate);
66 if (width)
67 *width = wcwidth(wc);
68 return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
69}
70
83bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
84{
85 if (!name || !buf)
86 return false;
87
88 while (*name)
89 {
90 /* Char's length in bytes */
91 int clen = mutt_mb_charlen(name, NULL);
92 if (clen < 1)
93 return false;
94
95 /* Ignore punctuation at the beginning of a word */
96 if ((clen == 1) && mutt_ispunct(*name))
97 {
98 name++;
99 continue;
100 }
101
102 if (clen >= buflen)
103 return false;
104
105 /* Copy one multibyte character */
106 buflen -= clen;
107 while (clen--)
108 *buf++ = *name++;
109
110 /* Skip to end-of-word */
111 for (; *name; name += clen)
112 {
113 clen = mutt_mb_charlen(name, NULL);
114 if (clen < 1)
115 return false;
116 if ((clen == 1) && (mutt_isspace(*name) || (*name == '-')))
117 break;
118 }
119
120 /* Skip any whitespace, or hyphens */
121 while (*name && (mutt_isspace(*name) || (*name == '-')))
122 name++;
123 }
124
125 *buf = '\0';
126 return true;
127}
128
138int mutt_mb_width(const char *str, int col, bool indent)
139{
140 if (!str || !*str)
141 return 0;
142
143 bool nl = false;
144 int total_width = 0;
145 mbstate_t mbstate = { 0 };
146
147 size_t str_len = mutt_str_len(str);
148
149 while (*str && (str_len > 0))
150 {
151 wchar_t wc = L'\0';
152 size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
153 if (consumed == 0)
154 break;
155
156 if (consumed == ICONV_ILLEGAL_SEQ)
157 {
158 memset(&mbstate, 0, sizeof(mbstate));
159 wc = ReplacementChar;
160 consumed = 1;
161 }
162 else if (consumed == ICONV_BUF_TOO_SMALL)
163 {
164 wc = ReplacementChar;
165 consumed = str_len;
166 }
167
168 int wchar_width = wcwidth(wc);
169 if (wchar_width < 0)
170 wchar_width = 1;
171
172 if ((wc == L'\t') || (nl && (wc == L' ')))
173 {
174 /* correctly calc tab stop, even for sending as the line should look
175 * pretty on the receiving end */
176 nl = false;
177 wchar_width = 8 - (col % 8);
178 }
179 else if (indent && (wc == '\n'))
180 {
181 /* track newlines for display-case: if we have a space after a newline,
182 * assume 8 spaces as for display we always tab-fold */
183 nl = true;
184 }
185
186 total_width += wchar_width;
187 str += consumed;
188 str_len -= consumed;
189 }
190
191 return total_width;
192}
193
199int mutt_mb_wcwidth(wchar_t wc)
200{
201 int n = wcwidth(wc);
202 if (IsWPrint(wc) && (n > 0))
203 return n;
204 if (!(wc & ~0x7f))
205 return 2;
206 if (!(wc & ~0xffff))
207 return 6;
208 return 10;
209}
210
217int mutt_mb_wcswidth(const wchar_t *s, size_t n)
218{
219 if (!s)
220 return 0;
221
222 int w = 0;
223 while (n--)
224 w += mutt_mb_wcwidth(*s++);
225 return w;
226}
227
238size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
239{
240 if (!s)
241 return 0;
242
243 const wchar_t *s0 = s;
244 int w = 0;
245 for (; n; s++, n--)
246 if ((w += mutt_mb_wcwidth(*s)) > w1)
247 break;
248 return s - s0;
249}
250
257void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
258{
259 if (!dest || !wstr)
260 return;
261
262 // Give ourselves 4 utf-8 bytes per wide character
263 buf_alloc(dest, 4 * wlen);
264
265 mbstate_t mbstate = { 0 };
266 size_t k = 0;
267
268 char *buf = dest->data;
269 size_t buflen = dest->dsize;
270
271 for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
272 {
273 k = wcrtomb(buf, *wstr, &mbstate);
274 if (k == ICONV_ILLEGAL_SEQ)
275 break;
276 if (*wstr == L'\0')
277 break;
278 }
279
280 *buf = '\0';
281 buf_fix_dptr(dest);
282}
283
292size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
293{
294 if (!pwbuf || !pwbuflen || !buf)
295 return 0;
296
297 wchar_t wc = 0;
298 mbstate_t mbstate = { 0 };
299 size_t k;
300 wchar_t *wbuf = *pwbuf;
301 size_t wbuflen = *pwbuflen;
302
303 while (*buf != '\0')
304 {
305 memset(&mbstate, 0, sizeof(mbstate));
306 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
307 (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
308 buf += k)
309 {
310 if (i >= wbuflen)
311 {
312 wbuflen = i + 20;
313 MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
314 }
315 wbuf[i++] = wc;
316 }
317 if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
318 {
319 if (i >= wbuflen)
320 {
321 wbuflen = i + 20;
322 MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
323 }
324 wbuf[i++] = ReplacementChar;
325 buf++;
326 }
327 }
328 *pwbuf = wbuf;
329 *pwbuflen = wbuflen;
330 return i;
331}
332
341bool mutt_mb_is_shell_char(wchar_t ch)
342{
343 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
344 return wcschr(shell_chars, ch);
345}
346
355bool mutt_mb_is_lower(const char *s)
356{
357 if (!s)
358 return false;
359
360 wchar_t wc = 0;
361 mbstate_t mbstate = { 0 };
362 size_t l;
363
364 memset(&mbstate, 0, sizeof(mbstate));
365 size_t n = mutt_str_len(s);
366
367 for (; (n > 0) && (*s != '\0') && (l = mbrtowc(&wc, s, n, &mbstate)) != 0; s += l, n -= l)
368 {
369 if ((l == ICONV_BUF_TOO_SMALL) || (l == ICONV_ILLEGAL_SEQ))
370 return false; // error; assume upper-case
371 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
372 return false; // upper-case
373 }
374
375 return true; // lower-case
376}
377
387{
388 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
389 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
390 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
391 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
392 {
393 return true;
394 }
395
396 /* left-to-right isolate, right-to-left isolate, first strong isolate,
397 * pop directional isolate */
398 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
399 return true;
400
401 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
402 * left-to-right override, right-to-left override */
403 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
404 return true;
405
406 /* arabic letter mark */
407 if (wc == (wchar_t) 0x061c)
408 return true;
409
410 return false;
411}
412
425{
426 if (!s || !*s)
427 return -1;
428
429 wchar_t wc = 0;
430 size_t k, k2;
431 char scratch[MB_LEN_MAX + 1];
432 char *p = *s;
433 mbstate_t mbstate1 = { 0 };
434 mbstate_t mbstate2 = { 0 };
435
436 struct Buffer *buf = buf_pool_get();
437 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
438 {
439 if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
440 {
441 k = 1;
442 memset(&mbstate1, 0, sizeof(mbstate1));
443 wc = ReplacementChar;
444 }
445 if (CharsetIsUtf8 && IsBOM(wc))
446 {
447 continue;
448 }
449 if (!IsWPrint(wc))
450 wc = '?';
452 continue;
453 k2 = wcrtomb(scratch, wc, &mbstate2);
454 scratch[k2] = '\0';
455 buf_addstr(buf, scratch);
456 }
457 FREE(s);
458
459 if (buf_is_empty(buf))
460 *s = MUTT_MEM_CALLOC(1, char); // Fake empty string
461 else
462 *s = buf_strdup(buf);
463
464 buf_pool_release(&buf);
465 return 0;
466}
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition buffer.c:291
void buf_fix_dptr(struct Buffer *buf)
Move the dptr to end of the Buffer.
Definition buffer.c:182
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition buffer.c:571
void buf_alloc(struct Buffer *buf, size_t new_size)
Make sure a buffer can store at least new_size bytes.
Definition buffer.c:337
General purpose object for storing and parsing strings.
ctype(3) wrapper functions
bool mutt_isspace(int arg)
Wrapper for isspace(3)
Definition ctype.c:96
bool mutt_ispunct(int arg)
Wrapper for ispunct(3)
Definition ctype.c:79
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition mbyte.c:56
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition mbyte.c:341
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition mbyte.c:424
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition mbyte.c:238
bool OptLocales
(pseudo) set if user has valid locale definition
Definition mbyte.c:45
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition mbyte.c:83
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition mbyte.c:386
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition mbyte.c:292
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition mbyte.c:217
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition mbyte.c:355
int mutt_mb_width(const char *str, int col, bool indent)
Measure a string's display width (in screen columns)
Definition mbyte.c:138
void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
Convert a string from wide to multibyte characters.
Definition mbyte.c:257
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition mbyte.c:199
Multi-byte String manipulation functions.
#define IsBOM(wc)
Definition mbyte.h:43
#define IsWPrint(wc)
Definition mbyte.h:41
Memory management wrappers.
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition memory.h:55
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition charset.c:66
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition charset.c:61
Conversion between different character encodings.
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition charset.h:116
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition string.c:500
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:91
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:111
A global pool of Buffers.
String manipulation functions.
String manipulation buffer.
Definition buffer.h:36
size_t dsize
Length of data.
Definition buffer.h:39
char * data
Pointer to data.
Definition buffer.h:37