Thanks to visit codestin.com
Credit goes to code.neomutt.org

NeoMutt  2025-12-11-219-g274730
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
mbyte.h File Reference

Multi-byte String manipulation functions. More...

#include "config.h"
#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <wctype.h>
+ Include dependency graph for mbyte.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define IsPrint(ch)
 
#define IsWPrint(wc)
 
#define IsBOM(wc)
 

Functions

int mutt_mb_charlen (const char *s, int *width)
 Count the bytes in a (multibyte) character.
 
int mutt_mb_filter_unprintable (char **s)
 Replace unprintable characters.
 
bool mutt_mb_get_initials (const char *name, char *buf, size_t buflen)
 Turn a name into initials.
 
bool mutt_mb_is_display_corrupting_utf8 (wchar_t wc)
 Will this character corrupt the display?
 
bool mutt_mb_is_lower (const char *s)
 Does a multi-byte string contain only lowercase characters?
 
bool mutt_mb_is_shell_char (wchar_t ch)
 Is character not typically part of a pathname.
 
size_t mutt_mb_mbstowcs (wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
 Convert a string from multibyte to wide characters.
 
void buf_mb_wcstombs (struct Buffer *dest, const wchar_t *wstr, size_t wlen)
 Convert a string from wide to multibyte characters.
 
int mutt_mb_wcswidth (const wchar_t *s, size_t n)
 Measure the screen width of a string.
 
int mutt_mb_wcwidth (wchar_t wc)
 Measure the screen width of a character.
 
int mutt_mb_width (const char *str, int col, bool indent)
 Measure a string's display width (in screen columns)
 
size_t mutt_mb_width_ceiling (const wchar_t *s, size_t n, int w1)
 Keep the end of the string on-screen.
 

Variables

bool OptLocales
 (pseudo) set if user has valid locale definition
 

Detailed Description

Multi-byte String manipulation functions.

Authors
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file mbyte.h.

Macro Definition Documentation

◆ IsPrint

#define IsPrint ( ch)
Value:
(isprint((unsigned char) (ch)) || (OptLocales ? 0 : ((unsigned char) (ch) >= 0xa0)))
bool OptLocales
(pseudo) set if user has valid locale definition
Definition mbyte.c:45

Definition at line 40 of file mbyte.h.

◆ IsWPrint

#define IsWPrint ( wc)
Value:
(iswprint(wc) || (OptLocales ? 0 : (wc >= 0xa0)))

Definition at line 41 of file mbyte.h.

◆ IsBOM

#define IsBOM ( wc)
Value:
(wc == L'\xfeff')

Definition at line 43 of file mbyte.h.

Function Documentation

◆ mutt_mb_charlen()

int mutt_mb_charlen ( const char * s,
int * width )

Count the bytes in a (multibyte) character.

Parameters
[in]sString to be examined
[out]widthNumber of screen columns the character would use
Return values
numBytes in the first (multibyte) character of input consumes
<0Conversion error
=0End of input
>0Length (bytes)

Definition at line 56 of file mbyte.c.

57{
58 if (!s || (*s == '\0'))
59 return 0;
60
61 wchar_t wc = 0;
62 mbstate_t mbstate = { 0 };
63
64 size_t n = mutt_str_len(s);
65 size_t k = mbrtowc(&wc, s, n, &mbstate);
66 if (width)
67 *width = wcwidth(wc);
68 return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
69}
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition charset.h:116
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition string.c:500
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_filter_unprintable()

int mutt_mb_filter_unprintable ( char ** s)

Replace unprintable characters.

Parameters
[in,out]sString to modify
Return values
0Success
-1Error

Unprintable characters will be replaced with ReplacementChar.

Note
The source string will be freed and a newly allocated string will be returned in its place. The caller should free the returned string.

Definition at line 424 of file mbyte.c.

425{
426 if (!s || !*s)
427 return -1;
428
429 wchar_t wc = 0;
430 size_t k, k2;
431 char scratch[MB_LEN_MAX + 1];
432 char *p = *s;
433 mbstate_t mbstate1 = { 0 };
434 mbstate_t mbstate2 = { 0 };
435
436 struct Buffer *buf = buf_pool_get();
437 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
438 {
439 if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
440 {
441 k = 1;
442 memset(&mbstate1, 0, sizeof(mbstate1));
443 wc = ReplacementChar;
444 }
445 if (CharsetIsUtf8 && IsBOM(wc))
446 {
447 continue;
448 }
449 if (!IsWPrint(wc))
450 wc = '?';
452 continue;
453 k2 = wcrtomb(scratch, wc, &mbstate2);
454 scratch[k2] = '\0';
455 buf_addstr(buf, scratch);
456 }
457 FREE(s);
458
459 if (buf_is_empty(buf))
460 *s = MUTT_MEM_CALLOC(1, char); // Fake empty string
461 else
462 *s = buf_strdup(buf);
463
464 buf_pool_release(&buf);
465 return 0;
466}
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition buffer.c:291
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition buffer.c:571
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition mbyte.c:386
#define IsBOM(wc)
Definition mbyte.h:43
#define IsWPrint(wc)
Definition mbyte.h:41
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition charset.c:66
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition charset.c:61
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:91
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:111
String manipulation buffer.
Definition buffer.h:36
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_get_initials()

bool mutt_mb_get_initials ( const char * name,
char * buf,
size_t buflen )

Turn a name into initials.

Parameters
nameString to be converted
bufBuffer for the result
buflenSize of the buffer
Return values
1Success
0Failure

Take a name, e.g. "John F. Kennedy" and reduce it to initials "JFK". The function saves the first character from each word. Words are delimited by whitespace, or hyphens (so "Jean-Pierre" becomes "JP").

Definition at line 83 of file mbyte.c.

84{
85 if (!name || !buf)
86 return false;
87
88 while (*name)
89 {
90 /* Char's length in bytes */
91 int clen = mutt_mb_charlen(name, NULL);
92 if (clen < 1)
93 return false;
94
95 /* Ignore punctuation at the beginning of a word */
96 if ((clen == 1) && mutt_ispunct(*name))
97 {
98 name++;
99 continue;
100 }
101
102 if (clen >= buflen)
103 return false;
104
105 /* Copy one multibyte character */
106 buflen -= clen;
107 while (clen--)
108 *buf++ = *name++;
109
110 /* Skip to end-of-word */
111 for (; *name; name += clen)
112 {
113 clen = mutt_mb_charlen(name, NULL);
114 if (clen < 1)
115 return false;
116 if ((clen == 1) && (mutt_isspace(*name) || (*name == '-')))
117 break;
118 }
119
120 /* Skip any whitespace, or hyphens */
121 while (*name && (mutt_isspace(*name) || (*name == '-')))
122 name++;
123 }
124
125 *buf = '\0';
126 return true;
127}
bool mutt_isspace(int arg)
Wrapper for isspace(3)
Definition ctype.c:96
bool mutt_ispunct(int arg)
Wrapper for ispunct(3)
Definition ctype.c:79
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition mbyte.c:56
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_is_display_corrupting_utf8()

bool mutt_mb_is_display_corrupting_utf8 ( wchar_t wc)

Will this character corrupt the display?

Parameters
wcCharacter to examine
Return values
trueCharacter would corrupt the display
falseCharacter is safe to display
Note
This list isn't complete.

Definition at line 386 of file mbyte.c.

387{
388 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
389 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
390 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
391 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
392 {
393 return true;
394 }
395
396 /* left-to-right isolate, right-to-left isolate, first strong isolate,
397 * pop directional isolate */
398 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
399 return true;
400
401 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
402 * left-to-right override, right-to-left override */
403 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
404 return true;
405
406 /* arabic letter mark */
407 if (wc == (wchar_t) 0x061c)
408 return true;
409
410 return false;
411}
+ Here is the caller graph for this function:

◆ mutt_mb_is_lower()

bool mutt_mb_is_lower ( const char * s)

Does a multi-byte string contain only lowercase characters?

Parameters
sString to check
Return values
trueString contains no uppercase characters
falseError, or contains some uppercase characters

Non-alphabetic characters are considered lowercase.

Definition at line 355 of file mbyte.c.

356{
357 if (!s)
358 return false;
359
360 wchar_t wc = 0;
361 mbstate_t mbstate = { 0 };
362 size_t l;
363
364 memset(&mbstate, 0, sizeof(mbstate));
365 size_t n = mutt_str_len(s);
366
367 for (; (n > 0) && (*s != '\0') && (l = mbrtowc(&wc, s, n, &mbstate)) != 0; s += l, n -= l)
368 {
369 if ((l == ICONV_BUF_TOO_SMALL) || (l == ICONV_ILLEGAL_SEQ))
370 return false; // error; assume upper-case
371 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
372 return false; // upper-case
373 }
374
375 return true; // lower-case
376}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_is_shell_char()

bool mutt_mb_is_shell_char ( wchar_t ch)

Is character not typically part of a pathname.

Parameters
chCharacter to examine
Return values
trueCharacter is not typically part of a pathname
falseCharacter is typically part of a pathname
Note
The name is very confusing.

Definition at line 341 of file mbyte.c.

342{
343 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
344 return wcschr(shell_chars, ch);
345}
+ Here is the caller graph for this function:

◆ mutt_mb_mbstowcs()

size_t mutt_mb_mbstowcs ( wchar_t ** pwbuf,
size_t * pwbuflen,
size_t i,
const char * buf )

Convert a string from multibyte to wide characters.

Parameters
[out]pwbufBuffer for the result
[out]pwbuflenLength of the result buffer
[in]iStarting index into the result buffer
[in]bufString to convert
Return values
numFirst character after the result

Definition at line 292 of file mbyte.c.

293{
294 if (!pwbuf || !pwbuflen || !buf)
295 return 0;
296
297 wchar_t wc = 0;
298 mbstate_t mbstate = { 0 };
299 size_t k;
300 wchar_t *wbuf = *pwbuf;
301 size_t wbuflen = *pwbuflen;
302
303 while (*buf != '\0')
304 {
305 memset(&mbstate, 0, sizeof(mbstate));
306 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
307 (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
308 buf += k)
309 {
310 if (i >= wbuflen)
311 {
312 wbuflen = i + 20;
313 MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
314 }
315 wbuf[i++] = wc;
316 }
317 if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
318 {
319 if (i >= wbuflen)
320 {
321 wbuflen = i + 20;
322 MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
323 }
324 wbuf[i++] = ReplacementChar;
325 buf++;
326 }
327 }
328 *pwbuf = wbuf;
329 *pwbuflen = wbuflen;
330 return i;
331}
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition memory.h:55
+ Here is the caller graph for this function:

◆ buf_mb_wcstombs()

void buf_mb_wcstombs ( struct Buffer * dest,
const wchar_t * wstr,
size_t wlen )

Convert a string from wide to multibyte characters.

Parameters
destBuffer for the result
wstrSource wide string to convert
wlenLength of the wide string

Definition at line 257 of file mbyte.c.

258{
259 if (!dest || !wstr)
260 return;
261
262 // Give ourselves 4 utf-8 bytes per wide character
263 buf_alloc(dest, 4 * wlen);
264
265 mbstate_t mbstate = { 0 };
266 size_t k = 0;
267
268 char *buf = dest->data;
269 size_t buflen = dest->dsize;
270
271 for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
272 {
273 k = wcrtomb(buf, *wstr, &mbstate);
274 if (k == ICONV_ILLEGAL_SEQ)
275 break;
276 if (*wstr == L'\0')
277 break;
278 }
279
280 *buf = '\0';
281 buf_fix_dptr(dest);
282}
void buf_fix_dptr(struct Buffer *buf)
Move the dptr to end of the Buffer.
Definition buffer.c:182
void buf_alloc(struct Buffer *buf, size_t new_size)
Make sure a buffer can store at least new_size bytes.
Definition buffer.c:337
size_t dsize
Length of data.
Definition buffer.h:39
char * data
Pointer to data.
Definition buffer.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_wcswidth()

int mutt_mb_wcswidth ( const wchar_t * s,
size_t n )

Measure the screen width of a string.

Parameters
sString to measure
nLength of string in characters
Return values
numWidth in screen columns

Definition at line 217 of file mbyte.c.

218{
219 if (!s)
220 return 0;
221
222 int w = 0;
223 while (n--)
224 w += mutt_mb_wcwidth(*s++);
225 return w;
226}
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition mbyte.c:199
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_wcwidth()

int mutt_mb_wcwidth ( wchar_t wc)

Measure the screen width of a character.

Parameters
wcCharacter to examine
Return values
numWidth in screen columns

Definition at line 199 of file mbyte.c.

200{
201 int n = wcwidth(wc);
202 if (IsWPrint(wc) && (n > 0))
203 return n;
204 if (!(wc & ~0x7f))
205 return 2;
206 if (!(wc & ~0xffff))
207 return 6;
208 return 10;
209}
+ Here is the caller graph for this function:

◆ mutt_mb_width()

int mutt_mb_width ( const char * str,
int col,
bool indent )

Measure a string's display width (in screen columns)

Parameters
strString to measure
colDisplay column (used for expanding tabs)
indentIf true, newline-space will be indented 8 chars
Return values
numString's width in screen columns

This is like wcwidth(), but gets const char* not wchar_t*.

Definition at line 138 of file mbyte.c.

139{
140 if (!str || !*str)
141 return 0;
142
143 bool nl = false;
144 int total_width = 0;
145 mbstate_t mbstate = { 0 };
146
147 size_t str_len = mutt_str_len(str);
148
149 while (*str && (str_len > 0))
150 {
151 wchar_t wc = L'\0';
152 size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
153 if (consumed == 0)
154 break;
155
156 if (consumed == ICONV_ILLEGAL_SEQ)
157 {
158 memset(&mbstate, 0, sizeof(mbstate));
159 wc = ReplacementChar;
160 consumed = 1;
161 }
162 else if (consumed == ICONV_BUF_TOO_SMALL)
163 {
164 wc = ReplacementChar;
165 consumed = str_len;
166 }
167
168 int wchar_width = wcwidth(wc);
169 if (wchar_width < 0)
170 wchar_width = 1;
171
172 if ((wc == L'\t') || (nl && (wc == L' ')))
173 {
174 /* correctly calc tab stop, even for sending as the line should look
175 * pretty on the receiving end */
176 nl = false;
177 wchar_width = 8 - (col % 8);
178 }
179 else if (indent && (wc == '\n'))
180 {
181 /* track newlines for display-case: if we have a space after a newline,
182 * assume 8 spaces as for display we always tab-fold */
183 nl = true;
184 }
185
186 total_width += wchar_width;
187 str += consumed;
188 str_len -= consumed;
189 }
190
191 return total_width;
192}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_width_ceiling()

size_t mutt_mb_width_ceiling ( const wchar_t * s,
size_t n,
int w1 )

Keep the end of the string on-screen.

Parameters
sString being displayed
nLength of string in characters
w1Width limit
Return values
numChars to skip

Given a string and a width, determine how many characters from the beginning of the string should be skipped so that the string fits.

Definition at line 238 of file mbyte.c.

239{
240 if (!s)
241 return 0;
242
243 const wchar_t *s0 = s;
244 int w = 0;
245 for (; n; s++, n--)
246 if ((w += mutt_mb_wcwidth(*s)) > w1)
247 break;
248 return s - s0;
249}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ OptLocales

bool OptLocales
extern

(pseudo) set if user has valid locale definition

Definition at line 45 of file mbyte.c.