Namespaces
Variants
Views
Actions

Difference between revisions of "cpp/regex/regex search"

From cppreference.com
< cpp‎ | regex
m ([first,last) -> [first, last), headers sorted, fmt)
m (fmt, {{c}}, {{range}})
Line 2: Line 2:
 
{{cpp/regex/navbar}}
 
{{cpp/regex/navbar}}
 
{{dcl begin}}
 
{{dcl begin}}
{{dcl header | regex}}
+
{{dcl header|regex}}
{{dcl | num=1 | since=c++11 | 1=
+
{{dcl|num=1|since=c++11|1=
 
template< class BidirIt,  
 
template< class BidirIt,  
 
           class Alloc, class CharT, class Traits >
 
           class Alloc, class CharT, class Traits >
Line 12: Line 12:
 
                       std::regex_constants::match_default );
 
                       std::regex_constants::match_default );
 
}}
 
}}
{{dcl | num=2 | since=c++11 | 1=
+
{{dcl|num=2|since=c++11|1=
 
template< class CharT, class Alloc, class Traits >
 
template< class CharT, class Alloc, class Traits >
 
bool regex_search( const CharT* str,
 
bool regex_search( const CharT* str,
Line 20: Line 20:
 
                       std::regex_constants::match_default );
 
                       std::regex_constants::match_default );
 
}}
 
}}
{{dcl | num=3 | since=c++11 | 1=
+
{{dcl|num=3|since=c++11|1=
 
template< class STraits, class SAlloc,
 
template< class STraits, class SAlloc,
 
           class Alloc, class CharT, class Traits >
 
           class Alloc, class CharT, class Traits >
Line 32: Line 32:
 
                       std::regex_constants::match_default );
 
                       std::regex_constants::match_default );
 
}}
 
}}
{{dcl | num=4 | since=c++11 | 1=
+
{{dcl|num=4|since=c++11|1=
 
template< class BidirIt,
 
template< class BidirIt,
 
           class CharT, class Traits >
 
           class CharT, class Traits >
Line 40: Line 40:
 
                       std::regex_constants::match_default );
 
                       std::regex_constants::match_default );
 
}}
 
}}
{{dcl | num=5 | since=c++11 | 1=
+
{{dcl|num=5|since=c++11|1=
 
template< class CharT, class Traits >
 
template< class CharT, class Traits >
 
bool regex_search( const CharT* str,
 
bool regex_search( const CharT* str,
Line 47: Line 47:
 
                       std::regex_constants::match_default );
 
                       std::regex_constants::match_default );
 
}}
 
}}
{{dcl | num=6 | since=c++11 | 1=
+
{{dcl|num=6|since=c++11|1=
 
template< class STraits, class SAlloc,
 
template< class STraits, class SAlloc,
 
           class CharT, class Traits >
 
           class CharT, class Traits >
Line 55: Line 55:
 
                       std::regex_constants::match_default );
 
                       std::regex_constants::match_default );
 
}}
 
}}
{{dcl | num=7 | since=c++11 | 1=
+
{{dcl|num=7|since=c++11|1=
 
template< class STraits, class SAlloc,
 
template< class STraits, class SAlloc,
 
           class Alloc, class CharT, class Traits >
 
           class Alloc, class CharT, class Traits >
Line 69: Line 69:
 
{{dcl end}}
 
{{dcl end}}
  
Determines if there is a match between the regular expression {{tt|e}} and some subsequence in the target character sequence.
+
Determines if there is a match between the regular expression {{c|e}} and some subsequence in the target character sequence.
  
@1@ Analyzes generic range {{tt|[first, last)}}. Match results are returned in {{tt|m}}.
+
@1@ Analyzes generic range {{range|first|last}}. Match results are returned in {{c|m}}.
  
@2@ Analyzes a null-terminated string pointed to by {{tt|str}}. Match results are returned in {{tt|m}}.
+
@2@ Analyzes a null-terminated string pointed to by {{c|str}}. Match results are returned in {{c|m}}.
  
@3@ Analyzes a string {{tt|s}}. Match results are returned in {{tt|m}}.
+
@3@ Analyzes a string {{c|s}}. Match results are returned in {{c|m}}.
  
 
@4-6@ Equivalent to {{v|1-3}}, just omits the match results.
 
@4-6@ Equivalent to {{v|1-3}}, just omits the match results.
  
@7@ The overload {{v|3}} is prohibited from accepting temporary strings, otherwise this function populates {{tt|match_results}} {{tt|m}} with string iterators that become invalid immediately.
+
@7@ The overload {{v|3}} is prohibited from accepting temporary strings, otherwise this function populates {{tt|match_results}} {{c|m}} with string iterators that become invalid immediately.
  
 
{{tt|regex_search}} will successfully match any subsequence of the given sequence, whereas {{lc|std::regex_match}} will only return {{c|true}} if the regular expression matches the ''entire'' sequence.
 
{{tt|regex_search}} will successfully match any subsequence of the given sequence, whereas {{lc|std::regex_match}} will only return {{c|true}} if the regular expression matches the ''entire'' sequence.
Line 85: Line 85:
 
===Parameters===
 
===Parameters===
 
{{par begin}}
 
{{par begin}}
{{par | first, last | a range identifying the target character sequence}}
+
{{par|first, last|a range identifying the target character sequence}}
{{par | str | a pointer to a null-terminated target character sequence}}
+
{{par|str|a pointer to a null-terminated target character sequence}}
{{par | s | a string identifying target character sequence}}
+
{{par|s|a string identifying target character sequence}}
{{par | e | the {{lc|std::regex}} that should be applied to the target character sequence}}
+
{{par|e|the {{lc|std::regex}} that should be applied to the target character sequence}}
{{par | m | the match results}}
+
{{par|m|the match results}}
{{par | flags | {{lc|std::regex_constants::match_flag_type}} governing search behavior}}
+
{{par|flags|{{lc|std::regex_constants::match_flag_type}} governing search behavior}}
 
{{par hreq}}
 
{{par hreq}}
{{par req named | BidirIt | BidirectionalIterator}}
+
{{par req named|BidirIt|BidirectionalIterator}}
{{par req named | Alloc | Allocator}}
+
{{par req named|Alloc|Allocator}}
 
{{par end}}
 
{{par end}}
  
 
===Return value===
 
===Return value===
Returns {{c|true}} if a match exists, {{c|false}} otherwise. In either case, the object {{tt|m}} is updated, as follows:
+
Returns {{c|true}} if a match exists, {{c|false}} otherwise. In either case, the object {{c|m}} is updated, as follows:
  
 
If the match does not exist:
 
If the match does not exist:
 
{{dsc begin}}
 
{{dsc begin}}
{{dsc | {{c|1=m.ready() == true}} }}
+
{{dsc|{{c|1=m.ready() == true}}}}
{{dsc | {{c|1=m.empty() == true}} }}
+
{{dsc|{{c|1=m.empty() == true}}}}
{{dsc | {{c|1=m.size() == 0}} }}
+
{{dsc|{{c|1=m.size() == 0}}}}
 
{{dsc end}}
 
{{dsc end}}
  
 
If the match exists:
 
If the match exists:
 
{{dsc begin}}
 
{{dsc begin}}
{{dsc | {{c|m.ready()}} | {{c|true}} }}
+
{{dsc|{{c|m.ready()}}|{{c|true}}}}
{{dsc | {{c|m.empty()}} | {{c|false}} }}
+
{{dsc|{{c|m.empty()}}|{{c|false}}}}
{{dsc | {{c|m.size()}} | number of [[cpp/regex/ecmascript#Sub-expressions|marked subexpressions]] plus 1, that is, {{c|1 + e.mark_count()}} }}
+
{{dsc|{{c|m.size()}}|number of [[cpp/regex/ecmascript#Sub-expressions|marked subexpressions]] plus 1, that is, {{c|1 + e.mark_count()}}}}
{{dsc | {{c|m.prefix().first}} | {{tt|first}} }}
+
{{dsc|{{c|m.prefix().first}}|{{c|first}}}}
{{dsc | {{c|m.prefix().second}} | {{c|m[0].first}} }}
+
{{dsc|{{c|m.prefix().second}}|{{c|m[0].first}}}}
{{dsc | {{c|m.prefix().matched}} | {{c|m.prefix().first !{{=}} m.prefix().second}} }}
+
{{dsc|{{c|m.prefix().matched}}|{{c|m.prefix().first !{{=}} m.prefix().second}}}}
{{dsc | {{c|m.suffix().first}} | {{c|m[0].second}}}}
+
{{dsc|{{c|m.suffix().first}}|{{c|m[0].second}}}}
{{dsc | {{c|m.suffix().second}} | {{tt|last}}}}
+
{{dsc|{{c|m.suffix().second}}|{{c|last}}}}
{{dsc | {{c|m.suffix().matched}} | {{c|m.suffix().first !{{=}} m.suffix().second}} }}
+
{{dsc|{{c|m.suffix().matched}}|{{c|m.suffix().first !{{=}} m.suffix().second}}}}
{{dsc | {{c|m[0].first}} | the start of the matching sequence }}
+
{{dsc|{{c|m[0].first}}|the start of the matching sequence}}
{{dsc | {{c|m[0].second}} | the end of the matching sequence }}
+
{{dsc|{{c|m[0].second}}|the end of the matching sequence}}
{{dsc | {{c|m[0].matched}} | {{c|true}}}}
+
{{dsc|{{c|m[0].matched}}|{{c|true}}}}
{{dsc | {{c|1=m[n].first}} | the start of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{tt|last}} if the subexpression did not participate in the match}}
+
{{dsc|{{c|1=m[n].first}}|the start of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{c|last}} if the subexpression did not participate in the match}}
{{dsc | {{c|1=m[n].second}} | the end of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{tt|last}} if the subexpression did not participate in the match}}
+
{{dsc|{{c|1=m[n].second}}|the end of the sequence that matched [[cpp/regex/ecmascript#Sub-expressions|marked sub-expression]] n, or {{c|last}} if the subexpression did not participate in the match}}
{{dsc | {{c|1=m[n].matched}} | {{c|true}} if sub-expression n participated in the match, {{c|false}} otherwise}}
+
{{dsc|{{c|1=m[n].matched}}|{{c|true}} if sub-expression n participated in the match, {{c|false}} otherwise}}
 
{{dsc end}}
 
{{dsc end}}
  
 
===Notes===
 
===Notes===
In order to examine all matches within the target sequence, {{tt|std::regex_search}} may be called in a loop, restarting each time from {{tt|m[0].second}} of the previous call. {{lc|std::regex_iterator}} offers an easy interface to this iteration.
+
In order to examine all matches within the target sequence, {{tt|std::regex_search}} may be called in a loop, restarting each time from {{c|m[0].second}} of the previous call. {{lc|std::regex_iterator}} offers an easy interface to this iteration.
  
 
===Example===
 
===Example===
 
{{example
 
{{example
| code=
+
|code=
 
#include <iostream>
 
#include <iostream>
 
#include <regex>
 
#include <regex>
Line 146: Line 146:
  
 
     // simple match
 
     // simple match
     for (const auto &line : lines) {
+
     for (const auto &line : lines)
 
         std::cout << line << ": " << std::boolalpha
 
         std::cout << line << ": " << std::boolalpha
 
                   << std::regex_search(line, color_regex) << '\n';
 
                   << std::regex_search(line, color_regex) << '\n';
    } 
 
 
     std::cout << '\n';
 
     std::cout << '\n';
 
   
 
   
 
     // show contents of marked subexpressions within each match
 
     // show contents of marked subexpressions within each match
 
     std::smatch color_match;
 
     std::smatch color_match;
     for (const auto& line : lines) {
+
     for (const auto& line : lines)
         if(std::regex_search(line, color_match, color_regex)) {
+
         if (std::regex_search(line, color_match, color_regex))
 +
        {
 
             std::cout << "matches for '" << line << "'\n";
 
             std::cout << "matches for '" << line << "'\n";
 
             std::cout << "Prefix: '" << color_match.prefix() << "'\n";
 
             std::cout << "Prefix: '" << color_match.prefix() << "'\n";
Line 162: Line 162:
 
             std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n";
 
             std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n";
 
         }
 
         }
    }
 
  
 
     // repeated search (see also std::regex_iterator)
 
     // repeated search (see also std::regex_iterator)
Line 174: Line 173:
 
     std::regex r(R"(Speed:\t\d*)");
 
     std::regex r(R"(Speed:\t\d*)");
 
     std::smatch sm;
 
     std::smatch sm;
     while(regex_search(log, sm, r))
+
     while (regex_search(log, sm, r))
 
     {
 
     {
 
         std::cout << sm.str() << '\n';
 
         std::cout << sm.str() << '\n';
Line 182: Line 181:
 
     // C-style string demo
 
     // C-style string demo
 
     std::cmatch cm;
 
     std::cmatch cm;
     if(std::regex_search("this is a test", cm, std::regex("test")))  
+
     if (std::regex_search("this is a test", cm, std::regex("test")))  
         std::cout << "\nFound " << cm[0] << " at position " << cm.prefix().length();
+
         std::cout << "\nFound " << cm[0] << " at position "
 +
                  << cm.prefix().length() << '\n';
 
}
 
}
| output=
+
|output=
 
Roses are #ff0000: true
 
Roses are #ff0000: true
 
violets are #0000ff: true
 
violets are #0000ff: true
Line 220: Line 220:
 
===See also===
 
===See also===
 
{{dsc begin}}
 
{{dsc begin}}
{{dsc inc | cpp/regex/dsc basic_regex}}
+
{{dsc inc|cpp/regex/dsc basic_regex}}
{{dsc inc | cpp/regex/dsc match_results}}
+
{{dsc inc|cpp/regex/dsc match_results}}
{{dsc inc | cpp/regex/dsc regex_match}}
+
{{dsc inc|cpp/regex/dsc regex_match}}
 
{{dsc end}}
 
{{dsc end}}
  
 
{{langlinks|de|es|fr|it|ja|pt|ru|zh}}
 
{{langlinks|de|es|fr|it|ja|pt|ru|zh}}

Revision as of 10:02, 16 August 2023

Defined in header <regex>
template< class BidirIt,

          class Alloc, class CharT, class Traits >
bool regex_search( BidirIt first, BidirIt last,
                   std::match_results<BidirIt,Alloc>& m,
                   const std::basic_regex<CharT,Traits>& e,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default );
(1) (since C++11)
template< class CharT, class Alloc, class Traits >

bool regex_search( const CharT* str,
                   std::match_results<const CharT*,Alloc>& m,
                   const std::basic_regex<CharT,Traits>& e,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default );
(2) (since C++11)
template< class STraits, class SAlloc,

          class Alloc, class CharT, class Traits >
bool regex_search( const std::basic_string<CharT,STraits,SAlloc>& s,
                   std::match_results<
                       typename std::basic_string<CharT,STraits,SAlloc>::const_iterator,
                       Alloc
                   >& m,
                   const std::basic_regex<CharT, Traits>& e,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default );
(3) (since C++11)
template< class BidirIt,

          class CharT, class Traits >
bool regex_search( BidirIt first, BidirIt last,
                   const std::basic_regex<CharT,Traits>& e,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default );
(4) (since C++11)
template< class CharT, class Traits >

bool regex_search( const CharT* str,
                   const std::basic_regex<CharT,Traits>& e,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default );
(5) (since C++11)
template< class STraits, class SAlloc,

          class CharT, class Traits >
bool regex_search( const std::basic_string<CharT,STraits,SAlloc>& s,
                   const std::basic_regex<CharT,Traits>& e,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default );
(6) (since C++11)
template< class STraits, class SAlloc,

          class Alloc, class CharT, class Traits >
bool regex_search( const std::basic_string<CharT,STraits,SAlloc>&&,
                   std::match_results<
                       typename std::basic_string<CharT,STraits,SAlloc>::const_iterator,
                       Alloc
                   >&,
                   const std::basic_regex<CharT, Traits>&,
                   std::regex_constants::match_flag_type flags =

                       std::regex_constants::match_default ) = delete;
(7) (since C++11)

Determines if there is a match between the regular expression e and some subsequence in the target character sequence.

1) Analyzes generic range [firstlast). Match results are returned in m.
2) Analyzes a null-terminated string pointed to by str. Match results are returned in m.
3) Analyzes a string s. Match results are returned in m.
4-6) Equivalent to (1-3), just omits the match results.
7) The overload (3) is prohibited from accepting temporary strings, otherwise this function populates match_results m with string iterators that become invalid immediately.

regex_search will successfully match any subsequence of the given sequence, whereas std::regex_match will only return true if the regular expression matches the entire sequence.

Contents

Parameters

first, last - a range identifying the target character sequence
str - a pointer to a null-terminated target character sequence
s - a string identifying target character sequence
e - the std::regex that should be applied to the target character sequence
m - the match results
flags - std::regex_constants::match_flag_type governing search behavior
Type requirements
-
BidirIt must meet the requirements of LegacyBidirectionalIterator.
-
Alloc must meet the requirements of Allocator.

Return value

Returns true if a match exists, false otherwise. In either case, the object m is updated, as follows:

If the match does not exist:

m.ready() == true
m.empty() == true
m.size() == 0

If the match exists:

m.ready() true
m.empty() false
m.size() number of marked subexpressions plus 1, that is, 1 + e.mark_count()
m.prefix().first first
m.prefix().second m[0].first
m.prefix().matched m.prefix().first != m.prefix().second
m.suffix().first m[0].second
m.suffix().second last
m.suffix().matched m.suffix().first != m.suffix().second
m[0].first the start of the matching sequence
m[0].second the end of the matching sequence
m[0].matched true
m[n].first the start of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match
m[n].second the end of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match
m[n].matched true if sub-expression n participated in the match, false otherwise

Notes

In order to examine all matches within the target sequence, std::regex_search may be called in a loop, restarting each time from m[0].second of the previous call. std::regex_iterator offers an easy interface to this iteration.

Example

#include <iostream>
#include <regex>
#include <string>
 
int main()
{
    std::string lines[] = {"Roses are #ff0000",
                           "violets are #0000ff",
                           "all of my base are belong to you"};
 
    std::regex color_regex("#([a-f0-9]{2})"
                            "([a-f0-9]{2})"
                            "([a-f0-9]{2})");
 
    // simple match
    for (const auto &line : lines)
        std::cout << line << ": " << std::boolalpha
                  << std::regex_search(line, color_regex) << '\n';
    std::cout << '\n';
 
    // show contents of marked subexpressions within each match
    std::smatch color_match;
    for (const auto& line : lines)
        if (std::regex_search(line, color_match, color_regex))
        {
            std::cout << "matches for '" << line << "'\n";
            std::cout << "Prefix: '" << color_match.prefix() << "'\n";
            for (size_t i = 0; i < color_match.size(); ++i) 
                std::cout << i << ": " << color_match[i] << '\n';
            std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n";
        }
 
    // repeated search (see also std::regex_iterator)
    std::string log(R"(
        Speed:	366
        Mass:	35
        Speed:	378
        Mass:	32
        Speed:	400
	Mass:	30)");
    std::regex r(R"(Speed:\t\d*)");
    std::smatch sm;
    while (regex_search(log, sm, r))
    {
        std::cout << sm.str() << '\n';
        log = sm.suffix();
    }
 
    // C-style string demo
    std::cmatch cm;
    if (std::regex_search("this is a test", cm, std::regex("test"))) 
        std::cout << "\nFound " << cm[0] << " at position "
                  << cm.prefix().length() << '\n';
}

Output:

Roses are #ff0000: true
violets are #0000ff: true
all of my base are belong to you: false
 
matches for 'Roses are #ff0000'
Prefix: 'Roses are '
0: #ff0000
1: ff
2: 00
3: 00
Suffix: ''
 
matches for 'violets are #0000ff'
Prefix: 'violets are '
0: #0000ff
1: 00
2: 00
3: ff
Suffix: ''
 
Speed:	366
Speed:	378
Speed:	400
 
Found test at position 10

Defect reports

The following behavior-changing defect reports were applied retroactively to previously published C++ standards.

DR Applied to Behavior as published Correct behavior
LWG 2329 C++11 basic_string rvalues were accepted, which was likely to result in dangling iterators rejected via a deleted overload

See also

regular expression object
(class template) [edit]
identifies one regular expression match, including all sub-expression matches
(class template) [edit]
attempts to match a regular expression to an entire character sequence
(function template) [edit]