各位大哥,有谁知道如何解析一串url
比如:www.csdn.net:80/abc/abc.htm
我要知道sitename 是www.csdn.net port是80 访问的页面是abc/abc.htm
小弟最近正用WinHttp学着做抓取网站的东东,
不知道那里有这方面的例子或者资料,
多谢!
比如:www.csdn.net:80/abc/abc.htm
我要知道sitename 是www.csdn.net port是80 访问的页面是abc/abc.htm
小弟最近正用WinHttp学着做抓取网站的东东,
不知道那里有这方面的例子或者资料,
多谢!
{
TCHAR szScheme[_MAX_PATH];
TCHAR szUserName[_MAX_PATH];
TCHAR szPassword[_MAX_PATH];
TCHAR szHostName[_MAX_PATH];
TCHAR szUrlPath[_MAX_PATH];
TCHAR szExtraInfo[_MAX_PATH]; URL_COMPONENTS url;
memset(&url, 0, sizeof(URL_COMPONENTS));
url.dwStructSize = sizeof(URL_COMPONENTS);
url.lpszScheme = szScheme;
url.dwSchemeLength = _MAX_PATH;
url.lpszHostName = szHostName;
url.dwHostNameLength = _MAX_PATH;
url.lpszUserName = szUserName;
url.dwUserNameLength = _MAX_PATH;
url.lpszPassword = szPassword;
url.dwPasswordLength = _MAX_PATH;
url.lpszUrlPath = szUrlPath;
url.dwUrlPathLength = _MAX_PATH;
url.lpszExtraInfo = szExtraInfo;
url.dwExtraInfoLength = _MAX_PATH; if (!InternetCrackUrl(lpszAddress, 0, 0, &url))
return false; // Verify that none of the other bits got filled in:
if ( strcmp( url.lpszScheme, "http") != 0)
return false;
if (url.nPort != INTERNET_DEFAULT_HTTP_PORT)
return false; // Not supported yet.
if (url.lpszUserName[0])
return false;
if (url.lpszPassword[0])
return false;
if (url.lpszExtraInfo[0])
return false; m_strHostName = szHostName;
m_strUrlPath = szUrlPath; return true;
}