[关闭]
@zhengyuhong 2015-06-08T10:55:21.000000Z 字数 7803 阅读 1595

regex

C++11 STL Boost


regex

  由于反斜杠对于C++字符和regex来说都是特殊字符,因而需要转义两次,才能正确执行。而在python中可以直接使用patn = r'/s',r表示禁止转义,所以patn的正则表达式跟作者的意图一致,而C++中没有r这个禁止转义符号,所以需要将反斜杠转义才能正确表达出作者的正则表达式意图。

regex

  1. typedef basic_regex<char> regex;

wregex

  1. typedef basic_regex<wchar_t> wregex;

basic_regex

  1. template <class charT, class traits = regex_traits<charT> > class basic_regex;

basic_regex::basic_regex

  1. basic_regex();
  2. basic_regex (const basic_regex& rgx);
  3. basic_regex (basic_regex&& rgx) noexcept;
  4. explicit basic_regex (const charT* str, flag_type flags = ECMAScript );
  5. basic_regex (const charT* str, size_t len, flag_type flags = ECMAScript );
  6. template <class ST, class SA>
  7. explicit basic_regex (const basic_string<charT,ST,SA>& str, flag_type flags = ECMAScript );
  8. template <class ForwardIterator>
  9. basic_regex (ForwardIterator first, ForwardIterator last, flag_type flags = ECMAScript );
  10. basic_regex (initializer_list<charT> il, flag_type flags = ECMAScript );

example

  1. // basic_regex constructors
  2. // note: using regex, a standard alias of basic_regex<char>
  3. #include <iostream>
  4. #include <string>
  5. #include <regex>
  6. int main ()
  7. {
  8. std::string pattern = "^.*$";
  9. std::regex first;
  10. // default
  11. std::regex second = first;
  12. // copy
  13. std::regex third (pattern);
  14. // string object initialization
  15. std::regex fourth ("<[^>]>");
  16. // string literal initialization
  17. std::regex fifth (pattern.begin(),pattern.end());
  18. // range initialization
  19. std::regex sixth {'.','+'};
  20. // initializer_list initialization
  21. std::regex seventh ("[0-9A-Z]+", std::regex::ECMAScript);
  22. // with syntax option
  23. using namespace std::regex_constants;
  24. // introducing constants namespace
  25. std::regex eighth ("[0-9A-Z]+", ECMAScript);
  26. // same as seventh
  27. std::regex ninth ("\\bd\\w+", ECMAScript | icase );
  28. // multiple flags
  29. std::string subject = "Duddy the duck";
  30. std::string replacement = "yup";
  31. std::cout << std::regex_replace (subject, ninth, replacement);
  32. std::cout << std::endl;
  33. return 0;
  34. }

  一般来说,正则表达式最难的地方就是写出正确的正则表达式,所以regex的构造函数是最为重要的。std::basic_regex还有其他公共成员函数,仅仅列出来,不一一介绍。详述查看reference

regex_match

  1. template <class charT, class traits>
  2. bool regex_match (
  3. const charT* s,
  4. const basic_regex<charT,traits>& rgx,
  5. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class ST, class SA, char charT, class traits>
  2. bool regex_match (
  3. const basic_string<charT,ST,SA>& s,
  4. const basic_regex<charT,traits>& rgx,
  5. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class charT, class Alloc, class traits>
  2. bool regex_match (
  3. const charT* s,
  4. match_results<const charT*, Alloc>& m,
  5. const basic_regex<charT,traits>& rgx,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);

  详述查看reference
  

  1. // regex_match example
  2. #include <iostream>
  3. #include <string>
  4. #include <regex>
  5. int main ()
  6. {
  7. if (std::regex_match ("subject", std::regex("(sub)(.*)") ))
  8. std::cout << "string literal matched\n";
  9. const char cstr[] = "subject";
  10. std::string s("subject");
  11. std::regex e("(sub)(.*)");
  12. if (std::regex_match (s,e))
  13. std::cout << "string object matched\n";
  14. if ( std::regex_match ( s.begin(), s.end(), e ) )
  15. std::cout << "range matched\n";
  16. std::cmatch cm;
  17. // same as std::match_results<const char*> cm;
  18. std::regex_match (cstr,cm,e);
  19. std::cout << "string literal with " << cm.size() << " matches\n";
  20. // using explicit flags:
  21. std::regex_match ( cstr, cm, e, std::regex_constants::match_default );
  22. std::smatch sm;
  23. // same as std::match_results<string::const_iterator> sm;
  24. std::regex_match (s,sm,e);
  25. std::cout << "string object with " << sm.size() << " matches\n";
  26. std::regex_match ( s.cbegin(), s.cend(), sm, e);
  27. std::cout << "range with " << sm.size() << " matches\n";
  28. std::cout << "the matches were: ";
  29. for (unsigned i=0; i<sm.size(); ++i) {
  30. std::cout << "[" << sm[i] << "] ";
  31. }
  32. std::cout << std::endl;
  33. return 0;
  34. }

regex_search

  1. template <class charT, class traits>
  2. bool regex_search (
  3. const charT* s,
  4. const basic_regex<charT,traits>& rgx,
  5. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class ST, class SA, char charT, class traits>
  2. bool regex_search (
  3. const basic_string<charT,ST,SA>& s,
  4. const basic_regex<charT,traits>& rgx,
  5. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class charT, class Alloc, class traits>
  2. bool regex_search (
  3. const charT* s,
  4. match_results<const charT*, Alloc>& m,
  5. const basic_regex<charT,traits>& rgx,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class ST, class SA, class Alloc, class charT, class traits>
  2. bool regex_search (
  3. const basic_string<charT,ST,SA>& s,
  4. match_results<typename basic_string<charT,ST,SA>::const_iterator,Alloc>& m,
  5. const basic_regex<charT,traits>& rgx,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);

example

  1. // regex_search example
  2. #include <iostream>
  3. #include <string>
  4. #include <regex>
  5. int main ()
  6. {
  7. std::string s ("this subject has a submarine as a subsequence");
  8. std::smatch m;
  9. std::regex e ("\\b(sub)([^ ]*)"); // matches words beginning by "sub"
  10. std::cout << "Target sequence: " << s << std::endl;
  11. std::cout << "Regular expression: /\\b(sub)([^ ]*)/" << std::endl;
  12. std::cout << "The following matches and submatches were found:" << std::endl;
  13. while (std::regex_search (s,m,e)) {
  14. for (auto x:m) {
  15. std::cout << x << " ";
  16. }
  17. std::cout << std::endl;
  18. s = m.suffix().str();
  19. }
  20. return 0;
  21. }

regex_replace

  1. template <class traits, class charT>
  2. basic_string<charT> regex_replace (
  3. const charT* s,
  4. const basic_regex<charT,traits>& rgx,
  5. const charT* fmt,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class traits, class charT, class ST, class SA>
  2. basic_string<charT> regex_replace (
  3. const charT*s,
  4. const basic_regex<charT,traits>& rgx,
  5. const basic_string<charT,ST,SA>& fmt,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class traits, class charT, class ST, class SA>
  2. basic_string<charT,ST,SA> regex_replace (
  3. const basic_string<charT,ST,SA>& s,
  4. const basic_regex<charT,traits>& rgx,
  5. const charT* fmt,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. template <class traits, class charT, class ST, class SA, class FST, class FSA>
  2. basic_string<charT,ST,SA> regex_replace (
  3. const basic_string<charT,ST,SA>& s,
  4. const basic_regex<charT,traits>& rgx,
  5. const basic_string<charT,FST,FSA>& fmt,
  6. regex_constants::match_flag_type flags = regex_constants::match_default);

example

  1. // regex_replace example
  2. #include <iostream>
  3. #include <string>
  4. #include <regex>
  5. #include <iterator>
  6. int main ()
  7. {
  8. std::string s ("there is a subsequence in the string\n");
  9. std::regex e ("\\b(sub)([^ ]*)"); // matches words beginning by "sub"
  10. // using string/c-string (3) version:
  11. std::cout << std::regex_replace (s,e,"sub-$2");
  12. // using range/c-string (6) version:
  13. std::string result;
  14. std::regex_replace (std::back_inserter(result), s.begin(), s.end(), e, "$2");
  15. std::cout << result;
  16. // with flags:
  17. std::cout << std::regex_replace (s,e,"$1 and $2",std::regex_constants::format_no_copy);
  18. std::cout << std::endl;
  19. return 0;
  20. }

  $n 后向引用,rgx=("\b(sub)([^ ]*)"),所以理应有两个匹配项,$1表示匹配到第一个字符串,$2表示匹配到第二个字符串。std::regex_replace (s,e,"sub-$2");表示将$1匹配到的字符使用"sub-"替换,$2保持不变

regex_iterator

  1. regex_iterator();
  2. //The default constructor (1) constructs an end-of-sequence iterator. This value shall not be dereferenced.
  3. regex_iterator (const regex_iterator& rit);
  4. regex_iterator (
  5. BidirectionalIterator first, BidirectionalIterator last,
  6. const regex_type& rgx,
  7. regex_constants::match_flag_type flags = regex_constants::match_default);
  1. #include <iostream>
  2. #include <string>
  3. #include <regex>
  4. int main ()
  5. {
  6. std::string s ("this subject has a submarine as a subsequence");
  7. std::regex e ("\\b(sub)([^ ]*)"); // matches words beginning by "sub"
  8. std::regex_iterator<std::string::iterator> rit ( s.begin(), s.end(), e );
  9. std::regex_iterator<std::string::iterator> rend;
  10. while (rit!=rend) {
  11. std::cout << rit->str() << std::endl;
  12. ++rit;
  13. }
  14. return 0;
  15. }

sub_match

  1. template <class BidirectionalIterator>
  2. class sub_match : public pair <BidirectionalIterator, BidirectionalIterator>;
  3. typedef sub_match<const char*> csub_match;
  4. //sub_match for string literals
  5. typedef sub_match<const wchar_t*> csub_match;
  6. //sub_match for wide string literals
  7. typedef sub_match<string::const_iterator> ssub_match;
  8. //sub_match for strings
  9. typedef sub_match<wstring::const_iterator> ssub_match;
  10. //sub_match for wide strings

  Stores each of the individual matches of a match_results object filled by one of the regex algorithms regex_match or regex_search, or by the regex iterators

  1. #include <iostream>
  2. #include <regex>
  3. int main ()
  4. {
  5. std::cmatch m; // default constructor
  6. std::regex_match ( "subject", m, std::regex("sub(.*)") );
  7. for (std::csub_match sub_m : m)
  8. std::cout << "match " << ": " << sub_m << std::endl;
  9. return 0;
  10. }
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注