utility/all.php

#include "config.h"

#include <limits.h>

#include <stdbool.h>

#include <string.h>

#include <wchar.h>

#include <wctype.h>

#include "mbyte.h"

#include "buffer.h"

#include "charset.h"

#include "ctype2.h"

#include "memory.h"

#include "pool.h"

#include "string2.h"


bool OptLocales;


int mutt_mb_charlen(const char *s, int *width)

{

  if (!s || (*s == '\0'))

    return 0;


  wchar_t wc = 0;

  mbstate_t mbstate = { 0 };


  size_t n = mutt_str_len(s);

  size_t k = mbrtowc(&wc, s, n, &mbstate);

  if (width)

    *width = wcwidth(wc);

  return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;

}


bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)

{

  if (!name || !buf)

    return false;


  while (*name)

  {

    /* Char's length in bytes */

    int clen = mutt_mb_charlen(name, NULL);

    if (clen < 1)

      return false;


    /* Ignore punctuation at the beginning of a word */

    if ((clen == 1) && mutt_ispunct(*name))

    {

      name++;

      continue;

    }


    if (clen >= buflen)

      return false;


    /* Copy one multibyte character */

    buflen -= clen;

    while (clen--)

      *buf++ = *name++;


    /* Skip to end-of-word */

    for (; *name; name += clen)

    {

      clen = mutt_mb_charlen(name, NULL);

      if (clen < 1)

        return false;

      if ((clen == 1) && (mutt_isspace(*name) || (*name == '-')))

        break;

    }


    /* Skip any whitespace, or hyphens */

    while (*name && (mutt_isspace(*name) || (*name == '-')))

      name++;

  }


  *buf = '\0';

  return true;

}


int mutt_mb_width(const char *str, int col, bool indent)

{

  if (!str || !*str)

    return 0;


  bool nl = false;

  int total_width = 0;

  mbstate_t mbstate = { 0 };


  size_t str_len = mutt_str_len(str);


  while (*str && (str_len > 0))

  {

    wchar_t wc = L'\0';

    size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);

    if (consumed == 0)

      break;


    if (consumed == ICONV_ILLEGAL_SEQ)

    {

      memset(&mbstate, 0, sizeof(mbstate));

      wc = ReplacementChar;

      consumed = 1;

    }

    else if (consumed == ICONV_BUF_TOO_SMALL)

    {

      wc = ReplacementChar;

      consumed = str_len;

    }


    int wchar_width = wcwidth(wc);

    if (wchar_width < 0)

      wchar_width = 1;


    if ((wc == L'\t') || (nl && (wc == L' ')))

    {

      /* correctly calc tab stop, even for sending as the line should look

       * pretty on the receiving end */

      nl = false;

      wchar_width = 8 - (col % 8);

    }

    else if (indent && (wc == '\n'))

    {

      /* track newlines for display-case: if we have a space after a newline,

       * assume 8 spaces as for display we always tab-fold */

      nl = true;

    }


    total_width += wchar_width;

    str += consumed;

    str_len -= consumed;

  }


  return total_width;

}


int mutt_mb_wcwidth(wchar_t wc)

{

  int n = wcwidth(wc);

  if (IsWPrint(wc) && (n > 0))

    return n;

  if (!(wc & ~0x7f))

    return 2;

  if (!(wc & ~0xffff))

    return 6;

  return 10;

}


int mutt_mb_wcswidth(const wchar_t *s, size_t n)

{

  if (!s)

    return 0;


  int w = 0;

  while (n--)

    w += mutt_mb_wcwidth(*s++);

  return w;

}


size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)

{

  if (!s)

    return 0;


  const wchar_t *s0 = s;

  int w = 0;

  for (; n; s++, n--)

    if ((w += mutt_mb_wcwidth(*s)) > w1)

      break;

  return s - s0;

}


void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)

{

  if (!dest || !wstr)

    return;


  // Give ourselves 4 utf-8 bytes per wide character

  buf_alloc(dest, 4 * wlen);


  mbstate_t mbstate = { 0 };

  size_t k = 0;


  char *buf = dest->data;

  size_t buflen = dest->dsize;


  for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)

  {

    k = wcrtomb(buf, *wstr, &mbstate);

    if (k == ICONV_ILLEGAL_SEQ)

      break;

    if (*wstr == L'\0')

      break;

  }


  *buf = '\0';

  buf_fix_dptr(dest);

}


size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)

{

  if (!pwbuf || !pwbuflen || !buf)

    return 0;


  wchar_t wc = 0;

  mbstate_t mbstate = { 0 };

  size_t k;

  wchar_t *wbuf = *pwbuf;

  size_t wbuflen = *pwbuflen;


  while (*buf != '\0')

  {

    memset(&mbstate, 0, sizeof(mbstate));

    for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&

           (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);

         buf += k)

    {

      if (i >= wbuflen)

      {

        wbuflen = i + 20;

        MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);

      }

      wbuf[i++] = wc;

    }

    if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))

    {

      if (i >= wbuflen)

      {

        wbuflen = i + 20;

        MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);

      }

      wbuf[i++] = ReplacementChar;

      buf++;

    }

  }

  *pwbuf = wbuf;

  *pwbuflen = wbuflen;

  return i;

}


bool mutt_mb_is_shell_char(wchar_t ch)

{

  static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */

  return wcschr(shell_chars, ch);

}


bool mutt_mb_is_lower(const char *s)

{

  if (!s)

    return false;


  wchar_t wc = 0;

  mbstate_t mbstate = { 0 };

  size_t l;


  memset(&mbstate, 0, sizeof(mbstate));

  size_t n = mutt_str_len(s);


  for (; (n > 0) && (*s != '\0') && (l = mbrtowc(&wc, s, n, &mbstate)) != 0; s += l, n -= l)

  {

    if ((l == ICONV_BUF_TOO_SMALL) || (l == ICONV_ILLEGAL_SEQ))

      return false; // error; assume upper-case

    if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))

      return false; // upper-case

  }


  return true; // lower-case

}


bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)

{

  if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */

      (wc == (wchar_t) 0x200e) || /* left-to-right mark */

      (wc == (wchar_t) 0x200f) || /* right-to-left mark */

      (wc == (wchar_t) 0xfeff))   /* zero width no-break space */

  {

    return true;

  }


  /* left-to-right isolate, right-to-left isolate, first strong isolate,

   * pop directional isolate */

  if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))

    return true;


  /* left-to-right embedding, right-to-left embedding, pop directional formatting,

   * left-to-right override, right-to-left override */

  if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))

    return true;


  /* arabic letter mark */

  if (wc == (wchar_t) 0x061c)

    return true;


  return false;

}


int mutt_mb_filter_unprintable(char **s)

{

  if (!s || !*s)

    return -1;


  wchar_t wc = 0;

  size_t k, k2;

  char scratch[MB_LEN_MAX + 1];

  char *p = *s;

  mbstate_t mbstate1 = { 0 };

  mbstate_t mbstate2 = { 0 };


  struct Buffer *buf = buf_pool_get();

  for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)

  {

    if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))

    {

      k = 1;

      memset(&mbstate1, 0, sizeof(mbstate1));

      wc = ReplacementChar;

    }

    if (CharsetIsUtf8 && IsBOM(wc))

    {

      continue;

    }

    if (!IsWPrint(wc))

      wc = '?';

    else if (CharsetIsUtf8 && mutt_mb_is_display_corrupting_utf8(wc))

      continue;

    k2 = wcrtomb(scratch, wc, &mbstate2);

    scratch[k2] = '\0';

    buf_addstr(buf, scratch);

  }

  FREE(s);


  if (buf_is_empty(buf))

    *s = MUTT_MEM_CALLOC(1, char); // Fake empty string

  else

    *s = buf_strdup(buf);


  buf_pool_release(&buf);

  return 0;

}


buf_is_empty
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition buffer.c:291

buf_fix_dptr
void buf_fix_dptr(struct Buffer *buf)
Move the dptr to end of the Buffer.
Definition buffer.c:182

buf_addstr
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226

buf_strdup
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition buffer.c:571

buf_alloc
void buf_alloc(struct Buffer *buf, size_t new_size)
Make sure a buffer can store at least new_size bytes.
Definition buffer.c:337

buffer.h
General purpose object for storing and parsing strings.

ctype2.h
ctype(3) wrapper functions

mutt_isspace
bool mutt_isspace(int arg)
Wrapper for isspace(3)
Definition ctype.c:96

mutt_ispunct
bool mutt_ispunct(int arg)
Wrapper for ispunct(3)
Definition ctype.c:79

mutt_mb_charlen
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition mbyte.c:56

mutt_mb_is_shell_char
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition mbyte.c:341

mutt_mb_filter_unprintable
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition mbyte.c:424

mutt_mb_width_ceiling
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition mbyte.c:238

OptLocales
bool OptLocales
(pseudo) set if user has valid locale definition
Definition mbyte.c:45

mutt_mb_get_initials
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition mbyte.c:83

mutt_mb_is_display_corrupting_utf8
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition mbyte.c:386

mutt_mb_mbstowcs
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition mbyte.c:292

mutt_mb_wcswidth
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition mbyte.c:217

mutt_mb_is_lower
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition mbyte.c:355

mutt_mb_width
int mutt_mb_width(const char *str, int col, bool indent)
Measure a string's display width (in screen columns)
Definition mbyte.c:138

buf_mb_wcstombs
void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
Convert a string from wide to multibyte characters.
Definition mbyte.c:257

mutt_mb_wcwidth
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition mbyte.c:199

mbyte.h
Multi-byte String manipulation functions.

IsBOM
#define IsBOM(wc)
Definition mbyte.h:43

IsWPrint
#define IsWPrint(wc)
Definition mbyte.h:41

memory.h
Memory management wrappers.

FREE
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68

MUTT_MEM_CALLOC
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52

MUTT_MEM_REALLOC
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition memory.h:55

CharsetIsUtf8
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition charset.c:66

ReplacementChar
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition charset.c:61

charset.h
Conversion between different character encodings.

ICONV_BUF_TOO_SMALL
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition charset.h:116

ICONV_ILLEGAL_SEQ
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114

mutt_str_len
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition string.c:500

buf_pool_get
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:91

buf_pool_release
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:111

pool.h
A global pool of Buffers.

string2.h
String manipulation functions.

Buffer
String manipulation buffer.
Definition buffer.h:36

Buffer::dsize
size_t dsize
Length of data.
Definition buffer.h:39

Buffer::data
char * data
Pointer to data.
Definition buffer.h:37