Difference between revisions of "cpp/regex/regex search"
Andreas Krug (Talk | contribs) m (<cstddef> for std::size_t, auto & -> auto&) |
(Added LWG issue #2205 DR (part 2/2).) |
||
Line 4: | Line 4: | ||
{{dcl header|regex}} | {{dcl header|regex}} | ||
{{dcl|num=1|since=c++11|1= | {{dcl|num=1|since=c++11|1= | ||
− | template< class BidirIt, | + | template< class BidirIt, class Alloc, class CharT, class Traits > |
− | + | ||
bool regex_search( BidirIt first, BidirIt last, | bool regex_search( BidirIt first, BidirIt last, | ||
− | + | std::match_results<BidirIt, Alloc>& m, | |
− | + | const std::basic_regex<CharT, Traits>& e, | |
− | + | std::regex_constants::match_flag_type flags = | |
− | + | std::regex_constants::match_default ); | |
}} | }} | ||
{{dcl|num=2|since=c++11|1= | {{dcl|num=2|since=c++11|1= | ||
+ | template< class BidirIt, class CharT, class Traits > | ||
+ | bool regex_search( BidirIt first, BidirIt last, | ||
+ | const std::basic_regex<CharT, Traits>& e, | ||
+ | std::regex_constants::match_flag_type flags = | ||
+ | std::regex_constants::match_default ); | ||
+ | }} | ||
+ | {{dcl|num=3|since=c++11|1= | ||
template< class CharT, class Alloc, class Traits > | template< class CharT, class Alloc, class Traits > | ||
bool regex_search( const CharT* str, | bool regex_search( const CharT* str, | ||
− | + | std::match_results<const CharT*, Alloc>& m, | |
− | + | const std::basic_regex<CharT, Traits>& e, | |
− | + | std::regex_constants::match_flag_type flags = | |
− | + | std::regex_constants::match_default ); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
}} | }} | ||
{{dcl|num=4|since=c++11|1= | {{dcl|num=4|since=c++11|1= | ||
− | template< | + | template< class CharT, class Traits > |
− | + | bool regex_search( const CharT* str, const std::basic_regex<CharT, Traits>& e, | |
− | bool regex_search( | + | std::regex_constants::match_flag_type flags = |
− | + | std::regex_constants::match_default ); | |
− | + | ||
− | + | ||
}} | }} | ||
{{dcl|num=5|since=c++11|1= | {{dcl|num=5|since=c++11|1= | ||
− | template< class CharT, class Traits > | + | template< class STraits, class SAlloc, class Alloc, |
− | bool regex_search( const CharT | + | class CharT, class Traits > |
− | + | bool regex_search | |
− | + | ( const std::basic_string<CharT, STraits, SAlloc>& s, | |
− | + | std::match_results | |
+ | <typename std::basic_string<CharT, STraits, SAlloc>::const_iterator, | ||
+ | Alloc>& m, | ||
+ | const std::basic_regex<CharT, Traits>& e, | ||
+ | std::regex_constants::match_flag_type flags = | ||
+ | std::regex_constants::match_default ); | ||
}} | }} | ||
{{dcl|num=6|since=c++11|1= | {{dcl|num=6|since=c++11|1= | ||
− | template< class STraits, class SAlloc, | + | template< class STraits, class SAlloc, class CharT, class Traits > |
− | + | bool regex_search( const std::basic_string<CharT, STraits, SAlloc>& s, | |
− | bool regex_search( const std::basic_string<CharT,STraits,SAlloc>& s, | + | const std::basic_regex<CharT, Traits>& e, |
− | + | std::regex_constants::match_flag_type flags = | |
− | + | std::regex_constants::match_default ); | |
− | + | ||
}} | }} | ||
{{dcl|num=7|since=c++11|1= | {{dcl|num=7|since=c++11|1= | ||
− | template< class STraits, class SAlloc, | + | template< class STraits, class SAlloc, class Alloc, |
− | + | class CharT, class Traits > | |
− | bool regex_search( const std::basic_string<CharT,STraits,SAlloc>&&, | + | bool regex_search |
− | + | ( const std::basic_string<CharT, STraits, SAlloc>&&, | |
− | + | std::match_results | |
− | + | <typename std::basic_string<CharT, STraits, SAlloc>::const_iterator, | |
− | + | Alloc>&, | |
− | + | const std::basic_regex<CharT, Traits>&, | |
− | + | std::regex_constants::match_flag_type flags = | |
− | + | std::regex_constants::match_default ) = delete; | |
}} | }} | ||
{{dcl end}} | {{dcl end}} | ||
− | Determines if there is a match between the regular expression {{c|e}} and some subsequence in the target character sequence. | + | Determines if there is a match between the regular expression {{c|e}} and some subsequence in the target character sequence. The detailed match result is stored in {{c|m}} (if present). |
− | @1@ | + | @1,2@ The target character sequence is represented by the range {{range|first|last}}. |
+ | {{rev begin}} | ||
+ | {{rev|until=c++23| | ||
+ | If {{tt|BidirIt}} does not satisfy the requirements of {{named req|BidirectionalIterator}}, the behavior is undefined. | ||
+ | }} | ||
+ | {{rev|since=c++23| | ||
+ | If {{tt|BidirIt}} does not model {{lconcept|bidirectional_iterator}}, the behavior is undefined. | ||
+ | }} | ||
+ | {{rev end}} | ||
− | @ | + | @3,4@ The target character sequence is represented by the range {{range|str|str + std::char_traits<CharT>::length(str)}}. |
− | @ | + | @5,6@ The target character sequence is represented by the string {{c|s}}. |
− | @ | + | @7@ The target character sequence cannot be represented by a {{lc|std::string}} rvalue. |
− | + | If a match does not exist, the following expressions involving {{c|m}} (if exists) should yield the specified values: | |
+ | {|class="wikitable" style="text-align: center;" | ||
+ | !{{nbsp}}Expression{{nbsp}} | ||
+ | !{{nbsp}}Value{{nbsp}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/ready}}{{c/core|()}}}} | ||
+ | |{{c|true}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/size}}{{c/core|()}}}} | ||
+ | |{{c|0}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/empty}}{{c/core|()}}}} | ||
+ | |{{c|true}} | ||
+ | |} | ||
− | {{ | + | If a match exists, given any integer in {{open range|0|m.size()}} as {{c|n}}, the following expressions involving {{c|m}} should yield the specified values for each overload listed below: |
+ | {|class="wikitable" style="text-align: center;" | ||
+ | !rowspan=2|{{nbsp|6}}Expression{{nbsp|6}} | ||
+ | !colspan=3|Value | ||
+ | |- | ||
+ | !{{nbsp|11}}Overload {{v|1}}{{nbsp|11}} | ||
+ | !{{nbsp|11}}Overload {{v|3}}{{nbsp|11}} | ||
+ | !{{nbsp|11}}Overload {{v|5}}{{nbsp|11}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/ready}}{{c/core|()}}}} | ||
+ | |colspan=3|{{c|true}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/size}}{{c/core|()}}}} | ||
+ | |colspan=3|{{box|{{c/core|1 + e.}}{{ltt|cpp/regex/basic_regex/mark_count}}{{c/core|()}}}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/empty}}{{c/core|()}}}} | ||
+ | |colspan=3|{{c|false}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/prefix}}{{c/core|().first}}}} | ||
+ | |{{c|first}} | ||
+ | |{{c|str}} | ||
+ | |{{c|s.begin()}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/prefix}}{{c/core|().second}}}} | ||
+ | |colspan=3|{{c|m[0].first}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/prefix}}{{c/core|().matched}}}} | ||
+ | |colspan=3|{{c|1=m.prefix().first != m.prefix().second}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/suffix}}{{c/core|().first}}}} | ||
+ | |colspan=3|{{c|m[0].second}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/suffix}}{{c/core|().second}}}} | ||
+ | |{{c|last}} | ||
+ | |<span style="text-align: start;">{{c multi|std::char_traits<CharT>::| length(str) + str}}</span> | ||
+ | |{{c|s.end()}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m.}}{{ltt|cpp/regex/match_results/suffix}}{{c/core|().matched}}}} | ||
+ | |colspan=3|{{c|1=m.suffix().first != m.suffix().second}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m}}{{ltt|cpp/regex/match_results/operator at|[0]}}{{c/core|.first}}}} | ||
+ | |colspan=3|the start of the sequence that matched {{c|e}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m}}{{ltt|cpp/regex/match_results/operator at|[0]}}{{c/core|.second}}}} | ||
+ | |colspan=3|the end of the sequence that matched {{c|e}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m}}{{ltt|cpp/regex/match_results/operator at|[0]}}{{c/core|.matched}}}} | ||
+ | |colspan=3|{{c|true}} | ||
+ | |- | ||
+ | |{{box|{{c/core|m}}{{ltt|cpp/regex/match_results/operator at|[n]}}{{c/core|.first}}}} | ||
+ | |colspan=3| | ||
+ | * {{c|last}} if [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] {{c|n}} did not participate in the match | ||
+ | * the start of the sequence otherwise matching sub-expression {{c|n}} otherwise | ||
+ | |- | ||
+ | |{{box|{{c/core|m}}{{ltt|cpp/regex/match_results/operator at|[n]}}{{c/core|.second}}}} | ||
+ | |colspan=3| | ||
+ | * {{c|last}} if [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] {{c|n}} did not participate in the match | ||
+ | * the end of the sequence otherwise matching sub-expression {{c|n}} otherwise | ||
+ | |- | ||
+ | |{{box|{{c/core|m}}{{ltt|cpp/regex/match_results/operator at|[n]}}{{c/core|.matched}}}} | ||
+ | |colspan=3| | ||
+ | * {{c|false}} if [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] {{c|n}} did not participate in the match | ||
+ | * {{c|true}} otherwise | ||
+ | |} | ||
===Parameters=== | ===Parameters=== | ||
{{par begin}} | {{par begin}} | ||
− | {{par|first, last| | + | {{par|first, last|the target character range}} |
− | {{par|str| | + | {{par|str|the target null-terminated C-style string}} |
− | {{par|s| | + | {{par|s|the target {{lc|std::basic_string}}}} |
− | + | ||
{{par|m|the match results}} | {{par|m|the match results}} | ||
− | {{par| | + | {{par|e|the regular expression}} |
− | {{par | + | {{par|flags|flags used to determine how the match will be performed}} |
− | + | ||
− | + | ||
{{par end}} | {{par end}} | ||
===Return value=== | ===Return value=== | ||
− | Returns {{c|true}} if a match exists, {{c|false}} otherwise. | + | Returns {{c|true}} if a match exists, {{c|false}} otherwise. |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
===Notes=== | ===Notes=== | ||
Line 141: | Line 192: | ||
"violets are #0000ff", | "violets are #0000ff", | ||
"all of my base are belong to you"}; | "all of my base are belong to you"}; | ||
− | + | ||
std::regex color_regex("#([a-f0-9]{2})" | std::regex color_regex("#([a-f0-9]{2})" | ||
"([a-f0-9]{2})" | "([a-f0-9]{2})" | ||
"([a-f0-9]{2})"); | "([a-f0-9]{2})"); | ||
− | + | ||
// simple match | // simple match | ||
for (const auto& line : lines) | for (const auto& line : lines) | ||
Line 151: | Line 202: | ||
<< std::regex_search(line, color_regex) << '\n'; | << std::regex_search(line, color_regex) << '\n'; | ||
std::cout << '\n'; | std::cout << '\n'; | ||
− | + | ||
// show contents of marked subexpressions within each match | // show contents of marked subexpressions within each match | ||
std::smatch color_match; | std::smatch color_match; | ||
Line 163: | Line 214: | ||
std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n"; | std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n"; | ||
} | } | ||
− | + | ||
// repeated search (see also std::regex_iterator) | // repeated search (see also std::regex_iterator) | ||
std::string log(R"( | std::string log(R"( | ||
Line 178: | Line 229: | ||
log = sm.suffix(); | log = sm.suffix(); | ||
} | } | ||
− | + | ||
// C-style string demo | // C-style string demo | ||
std::cmatch cm; | std::cmatch cm; | ||
Line 215: | Line 266: | ||
===Defect reports=== | ===Defect reports=== | ||
{{dr list begin}} | {{dr list begin}} | ||
− | {{dr list item|wg=lwg|dr=2329|std=C++11|before={{tt|basic_string}} rvalues | + | {{dr list item|wg=lwg|dr=2205|std=C++11|before={{c|n}} could be zero in the postcondition|after=can only be positive}} |
+ | {{dr list item|wg=lwg|dr=2329|std=C++11|before=overload {{v|5}} accepted {{tt|basic_string}} rvalues,<br>which could result in dangling iterators|after=rejected via deleted overload {{v|7}}}} | ||
{{dr list end}} | {{dr list end}} | ||
Revision as of 19:49, 14 November 2024
Defined in header <regex>
|
||
template< class BidirIt, class Alloc, class CharT, class Traits > bool regex_search( BidirIt first, BidirIt last, |
(1) | (since C++11) |
template< class BidirIt, class CharT, class Traits > bool regex_search( BidirIt first, BidirIt last, |
(2) | (since C++11) |
template< class CharT, class Alloc, class Traits > bool regex_search( const CharT* str, |
(3) | (since C++11) |
template< class CharT, class Traits > bool regex_search( const CharT* str, const std::basic_regex<CharT, Traits>& e, |
(4) | (since C++11) |
template< class STraits, class SAlloc, class Alloc, class CharT, class Traits > |
(5) | (since C++11) |
template< class STraits, class SAlloc, class CharT, class Traits > bool regex_search( const std::basic_string<CharT, STraits, SAlloc>& s, |
(6) | (since C++11) |
template< class STraits, class SAlloc, class Alloc, class CharT, class Traits > |
(7) | (since C++11) |
Determines if there is a match between the regular expression e and some subsequence in the target character sequence. The detailed match result is stored in m (if present).
[
first,
last)
.
If |
(until C++23) |
If |
(since C++23) |
[
str,
str + std::char_traits<CharT>::length(str))
.If a match does not exist, the following expressions involving m (if exists) should yield the specified values:
Expression | Value |
---|---|
m.ready() | true |
m.size() | 0 |
m.empty() | true |
If a match exists, given any integer in (
0,
m.size())
as n, the following expressions involving m should yield the specified values for each overload listed below:
Expression | Value | ||
---|---|---|---|
Overload (1) | Overload (3) | Overload (5) | |
m.ready() | true | ||
m.size() | 1 + e.mark_count() | ||
m.empty() | false | ||
m.prefix().first | first | str | s.begin() |
m.prefix().second | m[0].first | ||
m.prefix().matched | m.prefix().first != m.prefix().second | ||
m.suffix().first | m[0].second | ||
m.suffix().second | last | std::char_traits<CharT>:: length(str) + str |
s.end() |
m.suffix().matched | m.suffix().first != m.suffix().second | ||
m[0].first | the start of the sequence that matched e | ||
m[0].second | the end of the sequence that matched e | ||
m[0].matched | true | ||
m[n].first |
| ||
m[n].second |
| ||
m[n].matched |
|
Contents |
Parameters
first, last | - | the target character range |
str | - | the target null-terminated C-style string |
s | - | the target std::basic_string |
m | - | the match results |
e | - | the regular expression |
flags | - | flags used to determine how the match will be performed |
Return value
Returns true if a match exists, false otherwise.
Notes
In order to examine all matches within the target sequence, std::regex_search
may be called in a loop, restarting each time from m[0].second of the previous call. std::regex_iterator offers an easy interface to this iteration.
Example
#include <cstddef> #include <iostream> #include <regex> #include <string> int main() { std::string lines[] = {"Roses are #ff0000", "violets are #0000ff", "all of my base are belong to you"}; std::regex color_regex("#([a-f0-9]{2})" "([a-f0-9]{2})" "([a-f0-9]{2})"); // simple match for (const auto& line : lines) std::cout << line << ": " << std::boolalpha << std::regex_search(line, color_regex) << '\n'; std::cout << '\n'; // show contents of marked subexpressions within each match std::smatch color_match; for (const auto& line : lines) if (std::regex_search(line, color_match, color_regex)) { std::cout << "matches for '" << line << "'\n"; std::cout << "Prefix: '" << color_match.prefix() << "'\n"; for (std::size_t i = 0; i < color_match.size(); ++i) std::cout << i << ": " << color_match[i] << '\n'; std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n"; } // repeated search (see also std::regex_iterator) std::string log(R"( Speed: 366 Mass: 35 Speed: 378 Mass: 32 Speed: 400 Mass: 30)"); std::regex r(R"(Speed:\t\d*)"); for (std::smatch sm; regex_search(log, sm, r);) { std::cout << sm.str() << '\n'; log = sm.suffix(); } // C-style string demo std::cmatch cm; if (std::regex_search("this is a test", cm, std::regex("test"))) std::cout << "\nFound " << cm[0] << " at position " << cm.prefix().length() << '\n'; }
Output:
Roses are #ff0000: true violets are #0000ff: true all of my base are belong to you: false matches for 'Roses are #ff0000' Prefix: 'Roses are ' 0: #ff0000 1: ff 2: 00 3: 00 Suffix: '' matches for 'violets are #0000ff' Prefix: 'violets are ' 0: #0000ff 1: 00 2: 00 3: ff Suffix: '' Speed: 366 Speed: 378 Speed: 400 Found test at position 10
Defect reports
The following behavior-changing defect reports were applied retroactively to previously published C++ standards.
DR | Applied to | Behavior as published | Correct behavior |
---|---|---|---|
LWG 2205 | C++11 | n could be zero in the postcondition | can only be positive |
LWG 2329 | C++11 | overload (5) accepted basic_string rvalues,which could result in dangling iterators |
rejected via deleted overload (7) |
See also
(C++11) |
regular expression object (class template) |
(C++11) |
identifies one regular expression match, including all sub-expression matches (class template) |
(C++11) |
attempts to match a regular expression to an entire character sequence (function template) |