1[/ 2 Copyright 2006-2007 John Maddock. 3 Distributed under the Boost Software License, Version 1.0. 4 (See accompanying file LICENSE_1_0.txt or copy at 5 http://www.boost.org/LICENSE_1_0.txt). 6] 7 8 9[section:regex_replace regex_replace] 10 11 #include <boost/regex.hpp> 12 13The algorithm [regex_replace] searches through a string finding all the 14matches to the regular expression: for each match it then calls 15[match_results_format] to format the string and sends the result to the 16output iterator. Sections of text that do not match are copied to the 17output unchanged only if the /flags/ parameter does not have the 18flag `format_no_copy` set. If the flag `format_first_only` is set then 19only the first occurrence is replaced rather than all occurrences. 20 21 template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter> 22 OutputIterator regex_replace(OutputIterator out, 23 BidirectionalIterator first, 24 BidirectionalIterator last, 25 const basic_regex<charT, traits>& e, 26 Formatter fmt, 27 match_flag_type flags = match_default); 28 29 template <class traits, class Formatter> 30 basic_string<charT> regex_replace(const basic_string<charT>& s, 31 const basic_regex<charT, traits>& e, 32 Formatter fmt, 33 match_flag_type flags = match_default); 34 35 36[h4 Description] 37 38 template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter> 39 OutputIterator regex_replace(OutputIterator out, 40 BidirectionalIterator first, 41 BidirectionalIterator last, 42 const basic_regex<charT, traits>& e, 43 Formatter fmt, 44 match_flag_type flags = match_default); 45 46Enumerates all the occurrences of expression /e/ in the sequence \[first, last), 47replacing each occurrence with the string that results by merging the 48match found with the format string /fmt/, and copies the resulting string to /out/. 49In the case that /fmt/ is a unary, binary or ternary function object, then the 50character sequence generated by that object is copied unchanged to the output when performing 51a substitution. 52 53If the flag `format_no_copy` is set in /flags/ then unmatched sections of 54text are not copied to output. 55 56If the flag `format_first_only` is set in flags then only the first 57occurrence of /e/ is replaced. 58 59The manner in which the format string /fmt/ is interpreted, along with the 60rules used for finding matches, are determined by the flags set in /flags/: 61see [match_flag_type]. 62 63[*Requires] 64The type `Formatter` must be either a pointer to a null-terminated string 65of type `char_type[]`, or be a container of `char_type`'s (for example 66`std::basic_string<char_type>`) or be a unary, binary or ternary functor 67that computes the replacement string from a function call: either 68`fmt(what)` which must return a container of `char_type`'s to be used as the 69replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of 70which write the replacement text to `*out`, and then return the new 71OutputIterator position. In each case `what` is the [match_results] object 72that represents the match found. Note that if the formatter is a functor, then it is 73['passed by value]: users that want to pass function objects with internal state 74might want to use [@../../../../doc/html/ref.html Boost.Ref] to wrap the object so 75that it's passed by reference. 76 77[*Effects]: Constructs an [regex_iterator] object: 78 79 regex_iterator<BidirectionalIterator, charT, traits, Allocator> 80 i(first, last, e, flags), 81 82and uses /i/ to enumerate through all of the matches /m/ of type 83[match_results] `<BidirectionalIterator>` that occur within the sequence 84\[first, last). 85 86If no such matches are found and 87 88 !(flags & format_no_copy) 89 90then calls 91 92 std::copy(first, last, out). 93 94Otherwise, for each match found, if 95 96 !(flags & format_no_copy) 97 98calls 99 100 std::copy(m.prefix().first, m.prefix().second, out), 101 102and then calls 103 104 m.format(out, fmt, flags). 105 106Finally if 107 108 !(flags & format_no_copy) 109 110calls 111 112 std::copy(last_m.suffix().first, last_m.suffix().second, out) 113 114where /last_m/ is a copy of the last match found. 115 116If `flags & format_first_only` is non-zero then only the first match found 117is replaced. 118 119[*Throws]: `std::runtime_error` if the complexity of matching the expression 120against an N character string begins to exceed O(N[super 2]), or if the 121program runs out of stack space while matching the expression (if Boost.Regex is 122configured in recursive mode), or if the matcher exhausts its permitted 123memory allocation (if Boost.Regex is configured in non-recursive mode). 124 125[*Returns]: out. 126 127 template <class traits, class Formatter> 128 basic_string<charT> regex_replace(const basic_string<charT>& s, 129 const basic_regex<charT, traits>& e, 130 Formatter fmt, 131 match_flag_type flags = match_default); 132 133[*Requires] 134The type `Formatter` must be either a pointer to a null-terminated string 135of type `char_type[]`, or be a container of `char_type`'s (for example 136`std::basic_string<char_type>`) or be a unary, binary or ternary functor 137that computes the replacement string from a function call: either 138`fmt(what)` which must return a container of `char_type`'s to be used as the 139replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of 140which write the replacement text to `*out`, and then return the new 141OutputIterator position. In each case `what` is the [match_results] object 142that represents the match found. 143 144[*Effects]: Constructs an object `basic_string<charT> result`, calls 145`regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, flags)`, 146and then returns `result`. 147 148[h4 Examples] 149 150The following example takes C/C++ source code as input, and outputs 151syntax highlighted HTML code. 152 153 #include <fstream> 154 #include <sstream> 155 #include <string> 156 #include <iterator> 157 #include <boost/regex.hpp> 158 #include <fstream> 159 #include <iostream> 160 161 // purpose: 162 // takes the contents of a file and transform to 163 // syntax highlighted code in html format 164 165 boost::regex e1, e2; 166 extern const char* expression_text; 167 extern const char* format_string; 168 extern const char* pre_expression; 169 extern const char* pre_format; 170 extern const char* header_text; 171 extern const char* footer_text; 172 173 void load_file(std::string& s, std::istream& is) 174 { 175 s.erase(); 176 s.reserve(is.rdbuf()->in_avail()); 177 char c; 178 while(is.get(c)) 179 { 180 if(s.capacity() == s.size()) 181 s.reserve(s.capacity() * 3); 182 s.append(1, c); 183 } 184 } 185 186 int main(int argc, const char** argv) 187 { 188 try{ 189 e1.assign(expression_text); 190 e2.assign(pre_expression); 191 for(int i = 1; i < argc; ++i) 192 { 193 std::cout << "Processing file " << argv[i] << std::endl; 194 std::ifstream fs(argv[i]); 195 std::string in; 196 load_file(in, fs); 197 std::string out_name(std::string(argv[i]) + std::string(".htm")); 198 std::ofstream os(out_name.c_str()); 199 os << header_text; 200 // strip '<' and '>' first by outputting to a 201 // temporary string stream 202 std::ostringstream t(std::ios::out | std::ios::binary); 203 std::ostream_iterator<char, char> oi(t); 204 boost::regex_replace(oi, in.begin(), in.end(), 205 e2, pre_format, boost::match_default | boost::format_all); 206 // then output to final output stream 207 // adding syntax highlighting: 208 std::string s(t.str()); 209 std::ostream_iterator<char, char> out(os); 210 boost::regex_replace(out, s.begin(), s.end(), 211 e1, format_string, boost::match_default | boost::format_all); 212 os << footer_text; 213 } 214 } 215 catch(...) 216 { return -1; } 217 return 0; 218 } 219 220 extern const char* pre_expression = "(<)|(>)|(&)|\\r"; 221 extern const char* pre_format = "(?1<)(?2>)(?3&)"; 222 223 224 const char* expression_text = 225 // preprocessor directives: index 1 226 "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|" 227 // comment: index 2 228 "(//[^\\n]*|/\\*.*?\\*/)|" 229 // literals: index 3 230 "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+" 231 "(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" 232 // string literals: index 4 233 "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" 234 // keywords: index 5 235 "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" 236 "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" 237 "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" 238 "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" 239 "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" 240 "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" 241 "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" 242 "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" 243 "|using|virtual|void|volatile|wchar_t|while)\\>" 244 ; 245 246 const char* format_string = "(?1<font color=\"#008040\">$&</font>)" 247 "(?2<I><font color=\"#000080\">$&</font></I>)" 248 "(?3<font color=\"#0000A0\">$&</font>)" 249 "(?4<font color=\"#0000FF\">$&</font>)" 250 "(?5<B>$&</B>)"; 251 252 const char* header_text = 253 "<HTML>\n<HEAD>\n" 254 "<TITLE>Auto-generated html formatted source</TITLE>\n" 255 "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n" 256 "</HEAD>\n" 257 "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n" 258 "<P> </P>\n<PRE>"; 259 260 const char* footer_text = "</PRE>\n</BODY>\n\n"; 261 262 263[endsect] 264 265