Difference between revisions of "cpp/string/multibyte/mbrlen"
From cppreference.com
YexuanXiao (Talk | contribs) m |
(rv removal of the breakdown into the three utf-8 bytes. Also +basic use case in the example) |
||
Line 38: | Line 38: | ||
std::setlocale(LC_ALL, "en_US.utf8"); | std::setlocale(LC_ALL, "en_US.utf8"); | ||
// UTF-8 narrow multibyte encoding | // UTF-8 narrow multibyte encoding | ||
− | std::string str = " | + | std::string str = "水"; // or u8"\u6c34" or "\xe6\xb0\xb4" |
std::mbstate_t mb = std::mbstate_t(); | std::mbstate_t mb = std::mbstate_t(); | ||
+ | |||
+ | // simple use: length of a complete multibyte character | ||
+ | int len = std::mbrlen(&str[0], str.size(), &mb); | ||
+ | std::cout << "The length of " << str << " is " << len << " bytes\n"; | ||
+ | |||
+ | // advanced use: restarting in the middle of a multibyte character | ||
int len1 = std::mbrlen(&str[0], 1, &mb); | int len1 = std::mbrlen(&str[0], 1, &mb); | ||
if (len1 == -2) | if (len1 == -2) | ||
std::cout << "The first 1 byte of " << str | std::cout << "The first 1 byte of " << str | ||
<< " is an incomplete multibyte char (mbrlen returns -2)\n"; | << " is an incomplete multibyte char (mbrlen returns -2)\n"; | ||
+ | |||
int len2 = std::mbrlen(&str[1], str.size() - 1, &mb); | int len2 = std::mbrlen(&str[1], str.size() - 1, &mb); | ||
std::cout << "The remaining " << str.size() - 1 << " bytes of " << str | std::cout << "The remaining " << str.size() - 1 << " bytes of " << str | ||
<< " hold " << len2 << " bytes of the multibyte character\n"; | << " hold " << len2 << " bytes of the multibyte character\n"; | ||
+ | |||
+ | // error case: | ||
std::cout << "Attempting to call mbrlen() in the middle of " << str | std::cout << "Attempting to call mbrlen() in the middle of " << str | ||
<< " while in initial shift state returns " | << " while in initial shift state returns " | ||
Line 53: | Line 62: | ||
} | } | ||
|output= | |output= | ||
+ | The length of 水 is 3 bytes. | ||
The first 1 byte of 水 is an incomplete multibyte char (mbrlen returns -2) | The first 1 byte of 水 is an incomplete multibyte char (mbrlen returns -2) | ||
The remaining 2 bytes of 水 hold 2 bytes of the multibyte character | The remaining 2 bytes of 水 hold 2 bytes of the multibyte character |
Revision as of 10:41, 3 October 2024
Defined in header <cwchar>
|
||
std::size_t mbrlen( const char* s, std::size_t n, std::mbstate_t* ps); |
||
Determines the size, in bytes, of the remainder of the multibyte character whose first byte is pointed to by s, given the current conversion state ps.
This function is equivalent to the call std::mbrtowc(nullptr, s, n, ps ? ps : &internal) for some hidden object internal
of type std::mbstate_t, except that the expression ps is evaluated only once.
Contents |
Parameters
s | - | pointer to an element of a multibyte character string |
n | - | limit on the number of bytes in s that can be examined |
ps | - | pointer to the variable holding the conversion state |
Return value
0 if the next n or fewer bytes complete the null character.
The number of bytes (between 1 and n) that complete a valid multibyte character.
(size_t) - 1 if encoding error occurs.
(size_t) - 2 if the next n bytes are part of a possibly valid multibyte character, which is still incomplete after examining all n bytes.
Example
Run this code
#include <clocale> #include <cwchar> #include <iostream> #include <string> int main() { // allow mbrlen() to work with UTF-8 multibyte encoding std::setlocale(LC_ALL, "en_US.utf8"); // UTF-8 narrow multibyte encoding std::string str = "水"; // or u8"\u6c34" or "\xe6\xb0\xb4" std::mbstate_t mb = std::mbstate_t(); // simple use: length of a complete multibyte character int len = std::mbrlen(&str[0], str.size(), &mb); std::cout << "The length of " << str << " is " << len << " bytes\n"; // advanced use: restarting in the middle of a multibyte character int len1 = std::mbrlen(&str[0], 1, &mb); if (len1 == -2) std::cout << "The first 1 byte of " << str << " is an incomplete multibyte char (mbrlen returns -2)\n"; int len2 = std::mbrlen(&str[1], str.size() - 1, &mb); std::cout << "The remaining " << str.size() - 1 << " bytes of " << str << " hold " << len2 << " bytes of the multibyte character\n"; // error case: std::cout << "Attempting to call mbrlen() in the middle of " << str << " while in initial shift state returns " << (int)mbrlen(&str[1], str.size(), &mb) << '\n'; }
Output:
The length of 水 is 3 bytes. The first 1 byte of 水 is an incomplete multibyte char (mbrlen returns -2) The remaining 2 bytes of 水 hold 2 bytes of the multibyte character Attempting to call mbrlen() in the middle of 水 while in initial shift state returns -1
See also
converts the next multibyte character to wide character, given state (function) | |
returns the number of bytes in the next multibyte character (function) | |
[virtual] |
calculates the length of the ExternT string that would be consumed by conversion into given InternT buffer (virtual protected member function of std::codecvt<InternT,ExternT,StateT> )
|
C documentation for mbrlen
|