Namespaces
Variants
Views
Actions

Difference between revisions of "cpp/string/multibyte/mblen"

From cppreference.com
< cpp‎ | string‎ | multibyte
m
m ({{c}})
 
(10 intermediate revisions by 6 users not shown)
Line 1: Line 1:
 
{{cpp/title|mblen}}
 
{{cpp/title|mblen}}
 
{{cpp/string/multibyte/navbar}}
 
{{cpp/string/multibyte/navbar}}
{{ddcl | header=cstdlib |
+
{{ddcl|header=cstdlib|
 
int mblen( const char* s, std::size_t n );
 
int mblen( const char* s, std::size_t n );
 
}}
 
}}
  
Determines the size, in bytes, of the multibyte character whose first byte is pointed to by {{tt|s}}.  
+
Determines the size, in bytes, of the multibyte character whose first byte is pointed to by {{c|s}}.
  
If {{tt|s}} is a null pointer, resets the global conversion state and determined whether shift sequences are used.
+
If {{c|s}} is a null pointer, resets the global conversion state and determines whether shift sequences are used.
  
This function is equivalent to the call {{c|std::mbtowc((wchar_t*)0, s, n)}}, except that conversion state of {{lc|std::mbtowc}} is unaffected.
+
This function is equivalent to the call {{c|std::mbtowc(nullptr, s, n)}}, except that conversion state of {{lc|std::mbtowc}} is unaffected.
  
 
===Notes===
 
===Notes===
Line 16: Line 16:
 
===Parameters===
 
===Parameters===
 
{{par begin}}
 
{{par begin}}
{{par | s | pointer to the multibyte character}}
+
{{par|s|pointer to the multibyte character}}
{{par | n | limit on the number of bytes in s that can be examined}}
+
{{par|n|limit on the number of bytes in s that can be examined}}
 
{{par end}}
 
{{par end}}
  
 
===Return value===
 
===Return value===
If {{tt|s}} is not a null pointer, returns the number of bytes that are contained in the multibyte character or {{c|-1}} if the first bytes pointed to by {{tt|s}} do not form a valid multibyte character or {{c|0}} if {{tt|s}} is pointing at the null charcter {{c|'\0'}}.
+
If {{c|s}} is not a null pointer, returns the number of bytes that are contained in the multibyte character or {{c|-1}} if the first bytes pointed to by {{c|s}} do not form a valid multibyte character or {{c|0}} if {{c|s}} is pointing at the null character {{c|'\0'}}.
  
If {{tt|s}} is a null pointer, resets its internal conversion state to represent the initial shift state and returns {{c|0}} if the current multibyte encoding is not state-dependent (does not use shift sequences) or a non-zero value if the current multibyte encoding is state-dependent (uses shift sequences).
+
If {{c|s}} is a null pointer, resets its internal conversion state to represent the initial shift state and returns {{c|0}} if the current multibyte encoding is not state-dependent (does not use shift sequences) or a non-zero value if the current multibyte encoding is state-dependent (uses shift sequences).
  
 
===Example===
 
===Example===
 
{{example
 
{{example
|
+
|code=
| code=
+
 
#include <clocale>
 
#include <clocale>
#include <string>
 
#include <iostream>
 
 
#include <cstdlib>
 
#include <cstdlib>
 +
#include <iomanip>
 +
#include <iostream>
 
#include <stdexcept>
 
#include <stdexcept>
 +
#include <string_view>
  
 
// the number of characters in a multibyte string is the sum of mblen()'s
 
// the number of characters in a multibyte string is the sum of mblen()'s
// note: the simpler approach is std::mbstowcs(NULL, s.c_str(), s.size())
+
// note: the simpler approach is std::mbstowcs(nullptr, s.c_str(), s.size())
std::size_t strlen_mb(const std::string& s)
+
std::size_t strlen_mb(const std::string_view s)
 
{
 
{
 +
    std::mblen(nullptr, 0); // reset the conversion state
 
     std::size_t result = 0;
 
     std::size_t result = 0;
 
     const char* ptr = s.data();
 
     const char* ptr = s.data();
     const char* end = ptr + s.size();
+
     for (const char* const end = ptr + s.size(); ptr < end; ++result)
    std::mblen(NULL, 0); // reset the conversion state
+
    {
    while (ptr < end) {
+
         const int next = std::mblen(ptr, end - ptr);
         int next = std::mblen(ptr, end-ptr);
+
         if (next == -1)
         if (next == -1) {
+
 
             throw std::runtime_error("strlen_mb(): conversion error");
 
             throw std::runtime_error("strlen_mb(): conversion error");
        }
 
 
         ptr += next;
 
         ptr += next;
        ++result;
 
 
     }
 
     }
 
     return result;
 
     return result;
 +
}
 +
 +
void dump_bytes(const std::string_view str)
 +
{
 +
    std::cout << std::hex << std::uppercase << std::setfill('0');
 +
    for (unsigned char c : str)
 +
        std::cout << std::setw(2) << static_cast<int>(c) << ' ';
 +
    std::cout << std::dec << '\n';
 
}
 
}
  
Line 59: Line 65:
 
     std::setlocale(LC_ALL, "en_US.utf8");
 
     std::setlocale(LC_ALL, "en_US.utf8");
 
     // UTF-8 narrow multibyte encoding
 
     // UTF-8 narrow multibyte encoding
     std::string str = u8"z\u00df\u6c34\U0001d10b"; // or u8"zß水𝄋"
+
     const std::string_view str = "z\u00df\u6c34\U0001f34c"; // or u8"zß水🍌"
                      // or "\x7a\xc3\x9f\xe6\xb0\xb4\xf0\x9d\x84\x8b";
+
     std::cout << std::quoted(str) << " is " << strlen_mb(str)
     std::cout << str << " is " << str.size() << " bytes, but only "
+
              << " characters, but as much as " << str.size() << " bytes: ";
              << strlen_mb(str) << " characters\n";
+
    dump_bytes(str);
 
}
 
}
| output=
+
|p=true
zß水𝄋 is 10 bytes, but only 4 characters
+
|output=
 +
"zß水🍌" is 4 characters, but as much as 10 bytes: 7A C3 9F E6 B0 B4 F0 9F 8D 8C
 
}}
 
}}
  
 
===See also===
 
===See also===
 
{{dsc begin}}
 
{{dsc begin}}
{{dsc inc | cpp/string/multibyte/dsc mbtowc}}
+
{{dsc inc|cpp/string/multibyte/dsc mbtowc}}
{{dsc inc | cpp/string/multibyte/dsc mbrlen}}
+
{{dsc inc|cpp/string/multibyte/dsc mbrlen}}
{{dsc see c | c/string/multibyte/mblen}}
+
{{dsc see c|c/string/multibyte/mblen}}
 
{{dsc end}}
 
{{dsc end}}
  
[[de:cpp/string/multibyte/mblen]]
+
{{langlinks|de|es|fr|it|ja|pt|ru|zh}}
[[es:cpp/string/multibyte/mblen]]
+
[[fr:cpp/string/multibyte/mblen]]
+
[[it:cpp/string/multibyte/mblen]]
+
[[ja:cpp/string/multibyte/mblen]]
+
[[pt:cpp/string/multibyte/mblen]]
+
[[ru:cpp/string/multibyte/mblen]]
+
[[zh:cpp/string/multibyte/mblen]]
+

Latest revision as of 02:04, 9 June 2023

Defined in header <cstdlib>
int mblen( const char* s, std::size_t n );

Determines the size, in bytes, of the multibyte character whose first byte is pointed to by s.

If s is a null pointer, resets the global conversion state and determines whether shift sequences are used.

This function is equivalent to the call std::mbtowc(nullptr, s, n), except that conversion state of std::mbtowc is unaffected.

Contents

[edit] Notes

Each call to mblen updates the internal global conversion state (a static object of type std::mbstate_t, only known to this function). If the multibyte encoding uses shift states, care must be taken to avoid backtracking or multiple scans. In any case, multiple threads should not call mblen without synchronization: std::mbrlen may be used instead.

[edit] Parameters

s - pointer to the multibyte character
n - limit on the number of bytes in s that can be examined

[edit] Return value

If s is not a null pointer, returns the number of bytes that are contained in the multibyte character or -1 if the first bytes pointed to by s do not form a valid multibyte character or 0 if s is pointing at the null character '\0'.

If s is a null pointer, resets its internal conversion state to represent the initial shift state and returns 0 if the current multibyte encoding is not state-dependent (does not use shift sequences) or a non-zero value if the current multibyte encoding is state-dependent (uses shift sequences).

[edit] Example

#include <clocale>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <stdexcept>
#include <string_view>
 
// the number of characters in a multibyte string is the sum of mblen()'s
// note: the simpler approach is std::mbstowcs(nullptr, s.c_str(), s.size())
std::size_t strlen_mb(const std::string_view s)
{
    std::mblen(nullptr, 0); // reset the conversion state
    std::size_t result = 0;
    const char* ptr = s.data();
    for (const char* const end = ptr + s.size(); ptr < end; ++result)
    {
        const int next = std::mblen(ptr, end - ptr);
        if (next == -1)
            throw std::runtime_error("strlen_mb(): conversion error");
        ptr += next;
    }
    return result;
}
 
void dump_bytes(const std::string_view str)
{
    std::cout << std::hex << std::uppercase << std::setfill('0');
    for (unsigned char c : str)
        std::cout << std::setw(2) << static_cast<int>(c) << ' ';
    std::cout << std::dec << '\n';
}
 
int main()
{
    // allow mblen() to work with UTF-8 multibyte encoding
    std::setlocale(LC_ALL, "en_US.utf8");
    // UTF-8 narrow multibyte encoding
    const std::string_view str = "z\u00df\u6c34\U0001f34c"; // or u8"zß水🍌"
    std::cout << std::quoted(str) << " is " << strlen_mb(str)
              << " characters, but as much as " << str.size() << " bytes: ";
    dump_bytes(str);
}

Possible output:

"zß水🍌" is 4 characters, but as much as 10 bytes: 7A C3 9F E6 B0 B4 F0 9F 8D 8C

[edit] See also

converts the next multibyte character to wide character
(function) [edit]
returns the number of bytes in the next multibyte character, given state
(function) [edit]
C documentation for mblen