刚研究,以前没有接触boost程序是从文本读取数据 分析出email, 但结果不对,请懂得的人帮忙修改下,谢
一段从网页源码数据%20cocolor=#CC0000>Email</font>:[email protected] Add:Rainbow Road, Cha Shan Town, <font color=#CC0000>Dongguan</font> <font color=#CC0000>Co</font>.,<font color=#CC0000>Limited</font> <font color=#CC0000>Email</font>:[email protected] Tel...<br> color=#CC0000>Co</font>.,<font color=#CC0000>Limited</font> <font color=#CC0000>Email</font>: [email protected] Sports Goods <font color=#CC0000>Co</font>.,Ltd</a></h3> <font size=-1> SKYPE:xiaoyue2006 MSN:[email protected] <font color=#CC0000>EMAIL</font>:[email protected]..保存为 abc.txtVC 代码如下
#include <iostream>
#include <cassert>
#include <string>
#include <algorithm>
#include <functional>
#include <vector>
#include <fstream>
#include "boost/regex.hpp"
using namespace std;
using namespace boost;
//".*[\u4e00-\u9fa5]+.*"
//int countxx=0;
string replace_all(string str,const string old_value,const string new_value);
string replace_all(string str,const string old_value,const string new_value)
{
while(true) {
string::size_type pos(0);
if( (pos=str.find(old_value))!=string::npos )
str.replace(pos,old_value.length(),new_value);
else break;
}
return str;
} int main() {
while (true)
{
std::cout << "Enter a filename: \n";
std::string s; std::getline(std::cin, s);
if (s.compare("exit") == 0)
{
break;
}
std::ifstream fin(s.c_str());
if(!fin.is_open())
return false;
s = s + ".xml";
std::ofstream fou(s.c_str());
if(!fin.is_open())
return false;
int nCount = 1;
std::vector<std::string> strvect;
fou << "<config name=\"SDErrCode\">" << std::endl;
while(!fin.eof())
{
std::string inbuf;
getline(fin, inbuf,'\n');
//cout << inbuf<< std::endl;
inbuf=replace_all(inbuf,":"," ");
inbuf=replace_all(inbuf,".."," ");
cout << inbuf<< std::endl;
strvect.push_back(inbuf);
try {
// boost::regex reg("\".*[\u4e00-\u9fa5]+.*\"");
boost::regex reg("(\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*)");
//boost::regex reg("@");
boost::smatch what;
std::string::const_iterator start = inbuf.begin();
std::string::const_iterator end = inbuf.end();
if(boost::regex_search(start,end, what, reg))
{
for(int i=0;i< what.size();i++)
{
//fou << "\t<error name = \"\"\t\t" << "value=\"" << nCount++ << "\" " << "desc="<< what[i].str() << " />" << std::endl;
std::cout <<"email is: "<< what[i].str().c_str()<< std::endl;
}
}
}
catch(const boost::bad_expression& e)
{
std::cout << "That's not a valid regular expression! (Error: " << e.what() << ") Exiting...\n";
}
}
// fou << "</config>" << std::endl;
// std::cout << countxx<< std::endl;
}
getchar();
return 0;
}
程序编译通过, 运行 输入文件名, abc.txt
结果 是
email is: [email protected]
email is: [email protected]
email is:
email is:
email is:
明显不对,
请问错在哪里? 如何修改?
一段从网页源码数据%20cocolor=#CC0000>Email</font>:[email protected] Add:Rainbow Road, Cha Shan Town, <font color=#CC0000>Dongguan</font> <font color=#CC0000>Co</font>.,<font color=#CC0000>Limited</font> <font color=#CC0000>Email</font>:[email protected] Tel...<br> color=#CC0000>Co</font>.,<font color=#CC0000>Limited</font> <font color=#CC0000>Email</font>: [email protected] Sports Goods <font color=#CC0000>Co</font>.,Ltd</a></h3> <font size=-1> SKYPE:xiaoyue2006 MSN:[email protected] <font color=#CC0000>EMAIL</font>:[email protected]..保存为 abc.txtVC 代码如下
#include <iostream>
#include <cassert>
#include <string>
#include <algorithm>
#include <functional>
#include <vector>
#include <fstream>
#include "boost/regex.hpp"
using namespace std;
using namespace boost;
//".*[\u4e00-\u9fa5]+.*"
//int countxx=0;
string replace_all(string str,const string old_value,const string new_value);
string replace_all(string str,const string old_value,const string new_value)
{
while(true) {
string::size_type pos(0);
if( (pos=str.find(old_value))!=string::npos )
str.replace(pos,old_value.length(),new_value);
else break;
}
return str;
} int main() {
while (true)
{
std::cout << "Enter a filename: \n";
std::string s; std::getline(std::cin, s);
if (s.compare("exit") == 0)
{
break;
}
std::ifstream fin(s.c_str());
if(!fin.is_open())
return false;
s = s + ".xml";
std::ofstream fou(s.c_str());
if(!fin.is_open())
return false;
int nCount = 1;
std::vector<std::string> strvect;
fou << "<config name=\"SDErrCode\">" << std::endl;
while(!fin.eof())
{
std::string inbuf;
getline(fin, inbuf,'\n');
//cout << inbuf<< std::endl;
inbuf=replace_all(inbuf,":"," ");
inbuf=replace_all(inbuf,".."," ");
cout << inbuf<< std::endl;
strvect.push_back(inbuf);
try {
// boost::regex reg("\".*[\u4e00-\u9fa5]+.*\"");
boost::regex reg("(\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*)");
//boost::regex reg("@");
boost::smatch what;
std::string::const_iterator start = inbuf.begin();
std::string::const_iterator end = inbuf.end();
if(boost::regex_search(start,end, what, reg))
{
for(int i=0;i< what.size();i++)
{
//fou << "\t<error name = \"\"\t\t" << "value=\"" << nCount++ << "\" " << "desc="<< what[i].str() << " />" << std::endl;
std::cout <<"email is: "<< what[i].str().c_str()<< std::endl;
}
}
}
catch(const boost::bad_expression& e)
{
std::cout << "That's not a valid regular expression! (Error: " << e.what() << ") Exiting...\n";
}
}
// fou << "</config>" << std::endl;
// std::cout << countxx<< std::endl;
}
getchar();
return 0;
}
程序编译通过, 运行 输入文件名, abc.txt
结果 是
email is: [email protected]
email is: [email protected]
email is:
email is:
email is:
明显不对,
请问错在哪里? 如何修改?
while(boost::regex_search(start,end, what, reg))start要根据迭代更新为新位置start = what[0].second;
譬如string str("------563-----99-0--"); boost::regex reg("(.*?)(\\d{2})", boost::regex::icase);
boost::smatch what; string::const_iterator m = str.begin();
string::const_iterator n = str.end(); while(boost::regex_search(m, n, what, reg))
{
cout << what[0] << endl;
m = what[0].second;
}我想匹配以2位数字结尾的字串;
但上面的代码只能匹配出“------56” 和 “------563-----99”;
而不能匹配 “------563”;
请问有什么办法可以解决吗?
当然,如果每次都是 m+1 的话,比较繁琐;且还要判断当前的what[0].second 是否和上一次的重复;
感谢!
之前回答你也只是现编译的代码,才发现你没考虑到的问题
像你这次这个问题:
首先吧你要明白正则的匹配原则
你可以看一下以下网址的教程:http://www.cnblogs.com/deerchao/archive/2006/08/24/zhengzhe30fengzhongjiaocheng.html#greedyandlazy
你要明白你这么{(.*?)(\\d{2})}写代表的意义
(.*)任意字符重复0-N次,加上一个?变成(.*?)后就是可有可无
后面的(\\d{2})是数字出现两次