Difference between revisions of "cpp/regex/regex search"
Andreas Krug (Talk | contribs) m ([first,last) -> [first, last), headers sorted, fmt) |
Andreas Krug (Talk | contribs) m (fmt, {{c}}, {{range}}) |
||
Line 2: | Line 2: | ||
{{cpp/regex/navbar}} | {{cpp/regex/navbar}} | ||
{{dcl begin}} | {{dcl begin}} | ||
− | {{dcl header | regex}} | + | {{dcl header|regex}} |
− | {{dcl | num=1 | since=c++11 | 1= | + | {{dcl|num=1|since=c++11|1= |
template< class BidirIt, | template< class BidirIt, | ||
class Alloc, class CharT, class Traits > | class Alloc, class CharT, class Traits > | ||
Line 12: | Line 12: | ||
std::regex_constants::match_default ); | std::regex_constants::match_default ); | ||
}} | }} | ||
− | {{dcl | num=2 | since=c++11 | 1= | + | {{dcl|num=2|since=c++11|1= |
template< class CharT, class Alloc, class Traits > | template< class CharT, class Alloc, class Traits > | ||
bool regex_search( const CharT* str, | bool regex_search( const CharT* str, | ||
Line 20: | Line 20: | ||
std::regex_constants::match_default ); | std::regex_constants::match_default ); | ||
}} | }} | ||
− | {{dcl | num=3 | since=c++11 | 1= | + | {{dcl|num=3|since=c++11|1= |
template< class STraits, class SAlloc, | template< class STraits, class SAlloc, | ||
class Alloc, class CharT, class Traits > | class Alloc, class CharT, class Traits > | ||
Line 32: | Line 32: | ||
std::regex_constants::match_default ); | std::regex_constants::match_default ); | ||
}} | }} | ||
− | {{dcl | num=4 | since=c++11 | 1= | + | {{dcl|num=4|since=c++11|1= |
template< class BidirIt, | template< class BidirIt, | ||
class CharT, class Traits > | class CharT, class Traits > | ||
Line 40: | Line 40: | ||
std::regex_constants::match_default ); | std::regex_constants::match_default ); | ||
}} | }} | ||
− | {{dcl | num=5 | since=c++11 | 1= | + | {{dcl|num=5|since=c++11|1= |
template< class CharT, class Traits > | template< class CharT, class Traits > | ||
bool regex_search( const CharT* str, | bool regex_search( const CharT* str, | ||
Line 47: | Line 47: | ||
std::regex_constants::match_default ); | std::regex_constants::match_default ); | ||
}} | }} | ||
− | {{dcl | num=6 | since=c++11 | 1= | + | {{dcl|num=6|since=c++11|1= |
template< class STraits, class SAlloc, | template< class STraits, class SAlloc, | ||
class CharT, class Traits > | class CharT, class Traits > | ||
Line 55: | Line 55: | ||
std::regex_constants::match_default ); | std::regex_constants::match_default ); | ||
}} | }} | ||
− | {{dcl | num=7 | since=c++11 | 1= | + | {{dcl|num=7|since=c++11|1= |
template< class STraits, class SAlloc, | template< class STraits, class SAlloc, | ||
class Alloc, class CharT, class Traits > | class Alloc, class CharT, class Traits > | ||
Line 69: | Line 69: | ||
{{dcl end}} | {{dcl end}} | ||
− | Determines if there is a match between the regular expression {{ | + | Determines if there is a match between the regular expression {{c|e}} and some subsequence in the target character sequence. |
− | @1@ Analyzes generic range {{ | + | @1@ Analyzes generic range {{range|first|last}}. Match results are returned in {{c|m}}. |
− | @2@ Analyzes a null-terminated string pointed to by {{ | + | @2@ Analyzes a null-terminated string pointed to by {{c|str}}. Match results are returned in {{c|m}}. |
− | @3@ Analyzes a string {{ | + | @3@ Analyzes a string {{c|s}}. Match results are returned in {{c|m}}. |
@4-6@ Equivalent to {{v|1-3}}, just omits the match results. | @4-6@ Equivalent to {{v|1-3}}, just omits the match results. | ||
− | @7@ The overload {{v|3}} is prohibited from accepting temporary strings, otherwise this function populates {{tt|match_results}} {{ | + | @7@ The overload {{v|3}} is prohibited from accepting temporary strings, otherwise this function populates {{tt|match_results}} {{c|m}} with string iterators that become invalid immediately. |
{{tt|regex_search}} will successfully match any subsequence of the given sequence, whereas {{lc|std::regex_match}} will only return {{c|true}} if the regular expression matches the ''entire'' sequence. | {{tt|regex_search}} will successfully match any subsequence of the given sequence, whereas {{lc|std::regex_match}} will only return {{c|true}} if the regular expression matches the ''entire'' sequence. | ||
Line 85: | Line 85: | ||
===Parameters=== | ===Parameters=== | ||
{{par begin}} | {{par begin}} | ||
− | {{par | first, last | a range identifying the target character sequence}} | + | {{par|first, last|a range identifying the target character sequence}} |
− | {{par | str | a pointer to a null-terminated target character sequence}} | + | {{par|str|a pointer to a null-terminated target character sequence}} |
− | {{par | s | a string identifying target character sequence}} | + | {{par|s|a string identifying target character sequence}} |
− | {{par | e | the {{lc|std::regex}} that should be applied to the target character sequence}} | + | {{par|e|the {{lc|std::regex}} that should be applied to the target character sequence}} |
− | {{par | m | the match results}} | + | {{par|m|the match results}} |
− | {{par | flags | {{lc|std::regex_constants::match_flag_type}} governing search behavior}} | + | {{par|flags|{{lc|std::regex_constants::match_flag_type}} governing search behavior}} |
{{par hreq}} | {{par hreq}} | ||
− | {{par req named | BidirIt | BidirectionalIterator}} | + | {{par req named|BidirIt|BidirectionalIterator}} |
− | {{par req named | Alloc | Allocator}} | + | {{par req named|Alloc|Allocator}} |
{{par end}} | {{par end}} | ||
===Return value=== | ===Return value=== | ||
− | Returns {{c|true}} if a match exists, {{c|false}} otherwise. In either case, the object {{ | + | Returns {{c|true}} if a match exists, {{c|false}} otherwise. In either case, the object {{c|m}} is updated, as follows: |
If the match does not exist: | If the match does not exist: | ||
{{dsc begin}} | {{dsc begin}} | ||
− | {{dsc | {{c|1=m.ready() == true}} }} | + | {{dsc|{{c|1=m.ready() == true}}}} |
− | {{dsc | {{c|1=m.empty() == true}} }} | + | {{dsc|{{c|1=m.empty() == true}}}} |
− | {{dsc | {{c|1=m.size() == 0}} }} | + | {{dsc|{{c|1=m.size() == 0}}}} |
{{dsc end}} | {{dsc end}} | ||
If the match exists: | If the match exists: | ||
{{dsc begin}} | {{dsc begin}} | ||
− | {{dsc | {{c|m.ready()}} | {{c|true}} }} | + | {{dsc|{{c|m.ready()}}|{{c|true}}}} |
− | {{dsc | {{c|m.empty()}} | {{c|false}} }} | + | {{dsc|{{c|m.empty()}}|{{c|false}}}} |
− | {{dsc | {{c|m.size()}} | + | {{dsc|{{c|m.size()}}|number of [[cpp/regex/ecmascript#Sub-expressions|marked subexpressions]] plus 1, that is, {{c|1 + e.mark_count()}}}} |
− | {{dsc | {{c|m.prefix().first}} | {{ | + | {{dsc|{{c|m.prefix().first}}|{{c|first}}}} |
− | {{dsc | {{c|m.prefix().second}} | {{c|m[0].first}} }} | + | {{dsc|{{c|m.prefix().second}}|{{c|m[0].first}}}} |
− | {{dsc | {{c|m.prefix().matched}} | {{c|m.prefix().first !{{=}} m.prefix().second}} }} | + | {{dsc|{{c|m.prefix().matched}}|{{c|m.prefix().first !{{=}} m.prefix().second}}}} |
− | {{dsc | {{c|m.suffix().first}} | {{c|m[0].second}}}} | + | {{dsc|{{c|m.suffix().first}}|{{c|m[0].second}}}} |
− | {{dsc | {{c|m.suffix().second}} | {{ | + | {{dsc|{{c|m.suffix().second}}|{{c|last}}}} |
− | {{dsc | {{c|m.suffix().matched}} | {{c|m.suffix().first !{{=}} m.suffix().second}} }} | + | {{dsc|{{c|m.suffix().matched}}|{{c|m.suffix().first !{{=}} m.suffix().second}}}} |
− | {{dsc | {{c|m[0].first}} | the start of the matching sequence }} | + | {{dsc|{{c|m[0].first}}|the start of the matching sequence}} |
− | {{dsc | {{c|m[0].second}} | the end of the matching sequence }} | + | {{dsc|{{c|m[0].second}}|the end of the matching sequence}} |
− | {{dsc | {{c|m[0].matched}} | {{c|true}}}} | + | {{dsc|{{c|m[0].matched}}|{{c|true}}}} |
− | {{dsc | {{c|1=m[n].first}} | the start of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{ | + | {{dsc|{{c|1=m[n].first}}|the start of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{c|last}} if the subexpression did not participate in the match}} |
− | {{dsc | {{c|1=m[n].second}} | the end of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{ | + | {{dsc|{{c|1=m[n].second}}|the end of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{c|last}} if the subexpression did not participate in the match}} |
− | {{dsc | {{c|1=m[n].matched}} | {{c|true}} if sub-expression n participated in the match, {{c|false}} otherwise}} | + | {{dsc|{{c|1=m[n].matched}}|{{c|true}} if sub-expression n participated in the match, {{c|false}} otherwise}} |
{{dsc end}} | {{dsc end}} | ||
===Notes=== | ===Notes=== | ||
− | In order to examine all matches within the target sequence, {{tt|std::regex_search}} may be called in a loop, restarting each time from {{ | + | In order to examine all matches within the target sequence, {{tt|std::regex_search}} may be called in a loop, restarting each time from {{c|m[0].second}} of the previous call. {{lc|std::regex_iterator}} offers an easy interface to this iteration. |
===Example=== | ===Example=== | ||
{{example | {{example | ||
− | + | |code= | |
#include <iostream> | #include <iostream> | ||
#include <regex> | #include <regex> | ||
Line 146: | Line 146: | ||
// simple match | // simple match | ||
− | for (const auto &line : lines) | + | for (const auto &line : lines) |
std::cout << line << ": " << std::boolalpha | std::cout << line << ": " << std::boolalpha | ||
<< std::regex_search(line, color_regex) << '\n'; | << std::regex_search(line, color_regex) << '\n'; | ||
− | |||
std::cout << '\n'; | std::cout << '\n'; | ||
// show contents of marked subexpressions within each match | // show contents of marked subexpressions within each match | ||
std::smatch color_match; | std::smatch color_match; | ||
− | for (const auto& line : lines) | + | for (const auto& line : lines) |
− | if(std::regex_search(line, color_match, color_regex)) { | + | if (std::regex_search(line, color_match, color_regex)) |
+ | { | ||
std::cout << "matches for '" << line << "'\n"; | std::cout << "matches for '" << line << "'\n"; | ||
std::cout << "Prefix: '" << color_match.prefix() << "'\n"; | std::cout << "Prefix: '" << color_match.prefix() << "'\n"; | ||
Line 162: | Line 162: | ||
std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n"; | std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n"; | ||
} | } | ||
− | |||
// repeated search (see also std::regex_iterator) | // repeated search (see also std::regex_iterator) | ||
Line 174: | Line 173: | ||
std::regex r(R"(Speed:\t\d*)"); | std::regex r(R"(Speed:\t\d*)"); | ||
std::smatch sm; | std::smatch sm; | ||
− | while(regex_search(log, sm, r)) | + | while (regex_search(log, sm, r)) |
{ | { | ||
std::cout << sm.str() << '\n'; | std::cout << sm.str() << '\n'; | ||
Line 182: | Line 181: | ||
// C-style string demo | // C-style string demo | ||
std::cmatch cm; | std::cmatch cm; | ||
− | if(std::regex_search("this is a test", cm, std::regex("test"))) | + | if (std::regex_search("this is a test", cm, std::regex("test"))) |
− | std::cout << "\nFound " << cm[0] << " at position " << cm.prefix().length(); | + | std::cout << "\nFound " << cm[0] << " at position " |
+ | << cm.prefix().length() << '\n'; | ||
} | } | ||
− | + | |output= | |
Roses are #ff0000: true | Roses are #ff0000: true | ||
violets are #0000ff: true | violets are #0000ff: true | ||
Line 220: | Line 220: | ||
===See also=== | ===See also=== | ||
{{dsc begin}} | {{dsc begin}} | ||
− | {{dsc inc | cpp/regex/dsc basic_regex}} | + | {{dsc inc|cpp/regex/dsc basic_regex}} |
− | {{dsc inc | cpp/regex/dsc match_results}} | + | {{dsc inc|cpp/regex/dsc match_results}} |
− | {{dsc inc | cpp/regex/dsc regex_match}} | + | {{dsc inc|cpp/regex/dsc regex_match}} |
{{dsc end}} | {{dsc end}} | ||
{{langlinks|de|es|fr|it|ja|pt|ru|zh}} | {{langlinks|de|es|fr|it|ja|pt|ru|zh}} |
Revision as of 10:02, 16 August 2023
Defined in header <regex>
|
||
template< class BidirIt, class Alloc, class CharT, class Traits > |
(1) | (since C++11) |
template< class CharT, class Alloc, class Traits > bool regex_search( const CharT* str, |
(2) | (since C++11) |
template< class STraits, class SAlloc, class Alloc, class CharT, class Traits > |
(3) | (since C++11) |
template< class BidirIt, class CharT, class Traits > |
(4) | (since C++11) |
template< class CharT, class Traits > bool regex_search( const CharT* str, |
(5) | (since C++11) |
template< class STraits, class SAlloc, class CharT, class Traits > |
(6) | (since C++11) |
template< class STraits, class SAlloc, class Alloc, class CharT, class Traits > |
(7) | (since C++11) |
Determines if there is a match between the regular expression e and some subsequence in the target character sequence.
[
first,
last)
. Match results are returned in m.match_results
m with string iterators that become invalid immediately.regex_search
will successfully match any subsequence of the given sequence, whereas std::regex_match will only return true if the regular expression matches the entire sequence.
Contents |
Parameters
first, last | - | a range identifying the target character sequence |
str | - | a pointer to a null-terminated target character sequence |
s | - | a string identifying target character sequence |
e | - | the std::regex that should be applied to the target character sequence |
m | - | the match results |
flags | - | std::regex_constants::match_flag_type governing search behavior |
Type requirements | ||
-BidirIt must meet the requirements of LegacyBidirectionalIterator.
| ||
-Alloc must meet the requirements of Allocator.
|
Return value
Returns true if a match exists, false otherwise. In either case, the object m is updated, as follows:
If the match does not exist:
m.ready() == true | |
m.empty() == true | |
m.size() == 0 |
If the match exists:
m.ready() | true |
m.empty() | false |
m.size() | number of marked subexpressions plus 1, that is, 1 + e.mark_count() |
m.prefix().first | first |
m.prefix().second | m[0].first |
m.prefix().matched | m.prefix().first != m.prefix().second |
m.suffix().first | m[0].second |
m.suffix().second | last |
m.suffix().matched | m.suffix().first != m.suffix().second |
m[0].first | the start of the matching sequence |
m[0].second | the end of the matching sequence |
m[0].matched | true |
m[n].first | the start of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match |
m[n].second | the end of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match |
m[n].matched | true if sub-expression n participated in the match, false otherwise |
Notes
In order to examine all matches within the target sequence, std::regex_search
may be called in a loop, restarting each time from m[0].second of the previous call. std::regex_iterator offers an easy interface to this iteration.
Example
#include <iostream> #include <regex> #include <string> int main() { std::string lines[] = {"Roses are #ff0000", "violets are #0000ff", "all of my base are belong to you"}; std::regex color_regex("#([a-f0-9]{2})" "([a-f0-9]{2})" "([a-f0-9]{2})"); // simple match for (const auto &line : lines) std::cout << line << ": " << std::boolalpha << std::regex_search(line, color_regex) << '\n'; std::cout << '\n'; // show contents of marked subexpressions within each match std::smatch color_match; for (const auto& line : lines) if (std::regex_search(line, color_match, color_regex)) { std::cout << "matches for '" << line << "'\n"; std::cout << "Prefix: '" << color_match.prefix() << "'\n"; for (size_t i = 0; i < color_match.size(); ++i) std::cout << i << ": " << color_match[i] << '\n'; std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n"; } // repeated search (see also std::regex_iterator) std::string log(R"( Speed: 366 Mass: 35 Speed: 378 Mass: 32 Speed: 400 Mass: 30)"); std::regex r(R"(Speed:\t\d*)"); std::smatch sm; while (regex_search(log, sm, r)) { std::cout << sm.str() << '\n'; log = sm.suffix(); } // C-style string demo std::cmatch cm; if (std::regex_search("this is a test", cm, std::regex("test"))) std::cout << "\nFound " << cm[0] << " at position " << cm.prefix().length() << '\n'; }
Output:
Roses are #ff0000: true violets are #0000ff: true all of my base are belong to you: false matches for 'Roses are #ff0000' Prefix: 'Roses are ' 0: #ff0000 1: ff 2: 00 3: 00 Suffix: '' matches for 'violets are #0000ff' Prefix: 'violets are ' 0: #0000ff 1: 00 2: 00 3: ff Suffix: '' Speed: 366 Speed: 378 Speed: 400 Found test at position 10
Defect reports
The following behavior-changing defect reports were applied retroactively to previously published C++ standards.
DR | Applied to | Behavior as published | Correct behavior |
---|---|---|---|
LWG 2329 | C++11 | basic_string rvalues were accepted, which was likely to result in dangling iterators
|
rejected via a deleted overload |
See also
(C++11) |
regular expression object (class template) |
(C++11) |
identifies one regular expression match, including all sub-expression matches (class template) |
(C++11) |
attempts to match a regular expression to an entire character sequence (function template) |