using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data.OleDb; using System.Data; using System.Text.RegularExpressions; using System.Collections; using System.IO; namespace ConsoleApplication8 { class Program { public static void Main() { string str = "<a href=content_1.htm>, <a href=content_2.htm>"; Regex re = new Regex(@"href=(?<href>content[^>]*)"); MatchCollection mc = re.Matches(str); foreach(Match m in mc) Console.WriteLine(m.Groups["href"].Value); } }}
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data.OleDb; using System.Data; using System.Text.RegularExpressions; using System.Collections; using System.IO; namespace ConsoleApplication8 { class Program { public static void Main() { string str = "<a href=content_1.htm>title1 </a>,<a href=content_2.htm>title2 </a>"; Regex re = new Regex(@"href=(?<href>content[^>]*)>(?<title>[^<]*)"); MatchCollection mc = re.Matches(str); foreach(Match m in mc) Console.WriteLine("{0} {1}",m.Groups["href"].Value,m.Groups["title"].Value); } }}
(?is)(?<=<a href=)\s*content[^\s>]*
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data.OleDb;
using System.Data;
using System.Text.RegularExpressions;
using System.Collections;
using System.IO;
namespace ConsoleApplication8
{
class Program
{ public static void Main()
{
string str = "<a href=content_1.htm>, <a href=content_2.htm>";
Regex re = new Regex(@"href=(?<href>content[^>]*)");
MatchCollection mc = re.Matches(str);
foreach(Match m in mc)
Console.WriteLine(m.Groups["href"].Value);
} }}
<a href=content_2.htm>title2</a>也就是说想把url和title都取出来,请赐教
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data.OleDb;
using System.Data;
using System.Text.RegularExpressions;
using System.Collections;
using System.IO;
namespace ConsoleApplication8
{
class Program
{ public static void Main()
{
string str = "<a href=content_1.htm>title1 </a>,<a href=content_2.htm>title2 </a>";
Regex re = new Regex(@"href=(?<href>content[^>]*)>(?<title>[^<]*)");
MatchCollection mc = re.Matches(str);
foreach(Match m in mc)
Console.WriteLine("{0} {1}",m.Groups["href"].Value,m.Groups["title"].Value);
} }}
把:content_1.htm,title1和content_2.htm,title2都取出来
<' '*a' '+href' '*=' '*content.*\.htm
使用 lex 生成代码即可
\<" "*a" "+href" "*\=" "*content[A-Za-z0-9_]*"."htm
已经使用 Parser Generator 2 验证过.
test.l 如下%{
/****************************************************************************
ex_1.l
ParserWizard generated Lex file.Date: 2010年1月8日
****************************************************************************/
#include <stdio.h>
%}/////////////////////////////////////////////////////////////////////////////
// declarations section// place any declarations here
//content_string \<" "*a" "+'href'" "*'='" "*content.*\.htm
//content_string \<" "*a" "+'href'
aaa [0-9]
character [A-Za-z]
low_line '_'
number aaa+
my_id ({aaa}|{character}|{low_line})+
//content_string \<" "*a" "+href" "*\=" "*content{my_id}\.htm
dot '.'
//{dot}htm
content_string \<" "*a" "+href" "*\=" "*content[A-Za-z0-9_]*"."htm%%/////////////////////////////////////////////////////////////////////////////
// rules section// place your Lex rules here
{content_string} {
printf("\n## %s ##\n",yytext);
}
%%/////////////////////////////////////////////////////////////////////////////
// programs sectionint main(void)
{
return yylex();
}
生成的C 代码如下:
/****************************************************************************
* ex_1.c
* C source file generated from ex_1.l.
*
* Date: 01/08/10
* Time: 19:41:24
*
* ALex Version: 2.07
****************************************************************************/#include <yylex.h>/* namespaces */
#if defined(__cplusplus) && defined(YYSTDCPPLIB)
using namespace std;
#endif
#if defined(__cplusplus) && defined(YYNAMESPACE)
using namespace yl;
#endif#define YYFASTLEXER#line 1 ".\\ex_1.l"/****************************************************************************
ex_1.l
ParserWizard generated Lex file.Date: 2010年1月8日
****************************************************************************/
#include <stdio.h>#line 34 "ex_1.c"
/* repeated because of possible precompiled header */
#include <yylex.h>/* namespaces */
#if defined(__cplusplus) && defined(YYSTDCPPLIB)
using namespace std;
#endif
#if defined(__cplusplus) && defined(YYNAMESPACE)
using namespace yl;
#endif#define YYFASTLEXER#include ".\ex_1.h"#ifndef YYTEXT_SIZE
#define YYTEXT_SIZE 100
#endif
#ifndef YYUNPUT_SIZE
#define YYUNPUT_SIZE YYTEXT_SIZE
#endif
#ifndef YYTEXT_MAX
#define YYTEXT_MAX 0
#endif
#ifndef YYUNPUT_MAX
#define YYUNPUT_MAX YYTEXT_MAX
#endif/* yytext */
static char YYNEAR yysatext[(YYTEXT_SIZE) + 1]; /* extra char for \0 */
char YYFAR *YYNEAR YYDCDECL yystext = yysatext;
char YYFAR *YYNEAR YYDCDECL yytext = yysatext;
int YYNEAR YYDCDECL yystext_size = (YYTEXT_SIZE);
int YYNEAR YYDCDECL yytext_size = (YYTEXT_SIZE);
int YYNEAR YYDCDECL yytext_max = (YYTEXT_MAX);/* yystatebuf */
#if (YYTEXT_SIZE) != 0
static int YYNEAR yysastatebuf[(YYTEXT_SIZE)];
int YYFAR *YYNEAR YYDCDECL yysstatebuf = yysastatebuf;
int YYFAR *YYNEAR YYDCDECL yystatebuf = yysastatebuf;
#else
int YYFAR *YYNEAR YYDCDECL yysstatebuf = NULL;
int YYFAR *YYNEAR YYDCDECL yystatebuf = NULL;
#endif/* yyunputbuf */
#if (YYUNPUT_SIZE) != 0
static int YYNEAR yysaunputbuf[(YYUNPUT_SIZE)];
int YYFAR *YYNEAR YYDCDECL yysunputbufptr = yysaunputbuf;
int YYFAR *YYNEAR YYDCDECL yyunputbufptr = yysaunputbuf;
#else
int YYFAR *YYNEAR YYDCDECL yysunputbufptr = NULL;
int YYFAR *YYNEAR YYDCDECL yyunputbufptr = NULL;
#endif
int YYNEAR YYDCDECL yysunput_size = (YYUNPUT_SIZE);
int YYNEAR YYDCDECL yyunput_size = (YYUNPUT_SIZE);
int YYNEAR YYDCDECL yyunput_max = (YYUNPUT_MAX);/* backwards compatability with lex */
#ifdef input
#ifdef YYPROTOTYPE
int YYCDECL yyinput(void)
#else
int YYCDECL yyinput()
#endif
{
return input();
}
#else
#define input yyinput
#endif#ifdef output
#ifdef YYPROTOTYPE
void YYCDECL yyoutput(int ch)
#else
void YYCDECL yyoutput(ch)
int ch;
#endif
{
output(ch);
}
#else
#define output yyoutput
#endif#ifdef unput
#ifdef YYPROTOTYPE
void YYCDECL yyunput(int ch)
#else
void YYCDECL yyunput(ch)
int ch;
#endif
{
unput(ch);
}
#else
#define unput yyunput
#endif#ifndef YYNBORLANDWARN
#ifdef __BORLANDC__
#pragma warn -rch /* <warning: unreachable code> off */
#endif
#endif#ifdef YYPROTOTYPE
int YYCDECL yylexeraction(int action)
#else
int YYCDECL yylexeraction(action)
int action;
#endif
{
yyreturnflg = YYTRUE;
switch (action) {
case 1:
{
#line 33 ".\\ex_1.l" printf("\n## %s ##\n",yytext);
#line 157 "ex_1.c"
}
break;
default:
yyassert(0);
break;
}
yyreturnflg = YYFALSE;
return 0;
}#ifndef YYNBORLANDWARN
#ifdef __BORLANDC__
#pragma warn .rch /* <warning: unreachable code> to the old state */
#endif
#endif
YYCONST yymatch_t YYNEARFAR YYBASED_CODE YYDCDECL yymatch[] = {
0
};int YYNEAR YYDCDECL yytransitionmax = 85;
YYCONST yytransition_t YYNEARFAR YYBASED_CODE YYDCDECL yytransition[] = {
{ 0, 0 },
{ 18, 17 },
{ 7, 6 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 10, 10 },
{ 3, 3 },
{ 8, 7 },
{ 9, 8 },
{ 9, 9 },
{ 3, 1 },
{ 5, 4 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 10, 9 },
{ 12, 11 },
{ 13, 12 },
{ 14, 13 },
{ 17, 17 },
{ 15, 14 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 16, 15 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 17 },
{ 17, 16 },
{ 4, 3 },
{ 11, 10 },
{ 6, 5 },
{ 19, 18 },
{ 20, 19 },
{ 21, 20 }
};YYCONST yystate_t YYNEARFAR YYBASED_CODE YYDCDECL yystate[] = {
{ 0, 0, 0 },
{ 0, -42, 0 },
{ 1, 0, 0 },
{ 0, -18, 0 },
{ 0, -13, 0 },
{ 4, -23, 0 },
{ 0, -112, 0 },
{ 0, -86, 0 },
{ 0, -86, 0 },
{ 0, -15, 0 },
{ 0, -19, 0 },
{ 0, -64, 0 },
{ 0, -62, 0 },
{ 0, -67, 0 },
{ 0, -50, 0 },
{ 0, -39, 0 },
{ 0, -38, 0 },
{ 16, -45, 0 },
{ 0, -22, 0 },
{ 0, -33, 0 },
{ 0, -25, 0 },
{ 0, 0, 1 }
};YYCONST yybackup_t YYNEARFAR YYBASED_CODE YYDCDECL yybackup[] = {
0,
0
};#line 36 ".\\ex_1.l"
/////////////////////////////////////////////////////////////////////////////
// programs sectionint main(void)
{
return yylex();
}============================================================
后经过编译,链接。OK