下面是我做的一个程序,从一个网站上自动下载图片,其中有一些问题,于是我发了帖子:
http://topic.csdn.net/u/20080513/19/dc137f3e-0157-40cb-8379-6a712696a65b.html
http://topic.csdn.net/u/20080514/15/9679be3a-d491-4b25-a0c9-17a546b68389.html
这是一个支持MFC的控制台程序,所以把下面的代码拷到您的新建工程就应该直接运行了.
主要请大家解决如下问题:
1、为什么系统比较忙时出现如下问题:
在存取 XXX 时发生共享违例。
2、UTF-8的编码转换问题,网上包括CSDN上有一些转换太麻烦,我用的是API,但是,转换的结尾有点问题,有没有更可靠的办法?代码在我注释了“&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&”的地方
3、个人水平有限,这个程序还有哪些不足,请大家指出
助人乃快乐之本,于人于已益无穷。
问题结束后,一并结分!
http://topic.csdn.net/u/20080513/19/dc137f3e-0157-40cb-8379-6a712696a65b.html
http://topic.csdn.net/u/20080514/15/9679be3a-d491-4b25-a0c9-17a546b68389.html
这是一个支持MFC的控制台程序,所以把下面的代码拷到您的新建工程就应该直接运行了.
主要请大家解决如下问题:
1、为什么系统比较忙时出现如下问题:
在存取 XXX 时发生共享违例。
2、UTF-8的编码转换问题,网上包括CSDN上有一些转换太麻烦,我用的是API,但是,转换的结尾有点问题,有没有更可靠的办法?代码在我注释了“&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&”的地方
3、个人水平有限,这个程序还有哪些不足,请大家指出
助人乃快乐之本,于人于已益无穷。
问题结束后,一并结分!
#include "DownFromAsiaSex.h"
#include <afxinet.h>
#include <afxmt.h>
// #define _UNICODE
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif/////////////////////////////////////////////////////////////////////////////
// The one and only application objectCWinApp theApp;using namespace std;
TCHAR strFileBasePath[] = _T("D:\\清纯唯美\\");
CString html_log = CString(strFileBasePath) + "html_log.txt";
TCHAR strBaseURL[] = _T("http://8.441xx.com/html/11");
TCHAR strBaseURL2[] = _T("http://8.441xx.com/");
CString strFileNotGet = strFileBasePath + CString("not_get.txt");
CString strDirRemoveLog = strFileBasePath + CString("dir_remove.txt");
CString strPageLog = strFileBasePath + CString("page_log.txt");
CString strFileException = strFileBasePath + CString("file_exception.txt");
CString strInetException = strFileBasePath + CString("inet_exception.txt");int pic_index = 1;
int pic_dir = 1;
CString strFileDir;
CString file_path;
CString good_url;
CString strCookie;
CString strCokkieURL = _T("erosya.h.fc2.com");
CString strFind = "href=\"im/titi/";
int dir_len = 8;
int beg_page = 42;
CString JEPG_header = "image/jpeg, image/jpg";
CString HTML_header = "text/html";
CCriticalSection cs_cout;
CCriticalSection cs_pass_parm;
CCriticalSection cs_write_file;
int sem_count = 8;
CSemaphore sem(sem_count, sem_count);
CEvent evn;
CSingleLock singleLock(&evn);void OutputException(CFileException* e)
{
cs_write_file.Unlock();
TCHAR strMsg[256];
e->GetErrorMessage(strMsg, 256);
// cs_write_file.Unlock();
cs_cout.Lock();
cout << "文件异常:" << strMsg << endl;
cs_cout.Unlock();
CStdioFile file;
cs_write_file.Lock();
file.Open(strFileException, CFile::modeCreate|CFile::modeWrite|CFile::modeNoTruncate);
file.Seek(0, CFile::end);
file.WriteString(e->m_strFileName + "::" + CString(strMsg) + "\n");
file.Close();
cs_write_file.Unlock();
e->Delete();
}
void OutputException2(CInternetException* e, CString& url)
{
// cs_write_file.Unlock();
TCHAR strMsg[256];
e->GetErrorMessage(strMsg, 256);
// cs_write_file.Unlock();
cs_cout.Lock();
cout << "读INTERNET异常:" << strMsg << endl;
cs_cout.Unlock();
CStdioFile file;
cs_write_file.Lock();
file.Open(strInetException, CFile::modeCreate|CFile::modeWrite|CFile::modeNoTruncate);
file.Seek(0, CFile::end);
file.WriteString(url+ CString(strMsg) + "\n");
file.Close();
cs_write_file.Unlock();
e->Delete();
}
BOOL GetFromWeb(CString& url, CString& szHeader, char** ppBuffer, int* pLen = NULL, int nCalls=0, int nReads = 0/*, CHttpFile** ppHttpFile = NULL*/)
{
cs_cout.Lock();
cout << "GetFromWeb连接:" << (LPCTSTR)url << endl;
cs_cout.Unlock();
CString strServerName, strObject;
INTERNET_PORT inet_port;
DWORD dwServiceType, dwHttpRequestFlags = INTERNET_FLAG_NO_AUTO_REDIRECT|INTERNET_FLAG_RELOAD;
BOOL bOK = AfxParseURL(url, dwServiceType, strServerName, strObject, inet_port);
if(!bOK || dwServiceType != INTERNET_SERVICE_HTTP)
{
cs_cout.Lock();
cout << "URL出错" <<endl;
cs_cout.Unlock();
return FALSE;
} CInternetSession session;//("Outlook", 1, PRE_CONFIG_INTERNET_ACCESS, NULL,
//INTERNET_INVALID_PORT_NUMBER, 0);
CHttpConnection *http_server = NULL;
CHttpFile *pHttpFile = NULL;
try
{
http_server = session.GetHttpConnection(strServerName, inet_port);
pHttpFile = http_server->OpenRequest(CHttpConnection::HTTP_VERB_GET, strObject, NULL, 1, NULL, NULL, dwServiceType);
pHttpFile->AddRequestHeaders(szHeader);
pHttpFile->SendRequest();
}
catch (CInternetException* e)
{
TCHAR strMsg[256];
e->GetErrorMessage(strMsg, 255);
cs_cout.Lock();
cout << "异常:" << strMsg << "\t地址是" << (LPCTSTR)url << endl;
cs_cout.Unlock();
if(pHttpFile != NULL)
{
pHttpFile->Close();
delete pHttpFile;
}
if(http_server != NULL)
{
http_server->Close();
delete http_server;
}
session.Close();
if(nCalls >= 8)
{
cs_cout.Lock();
cout<<"异常次数达到8次,放弃连接"<<endl;
cs_cout.Unlock();
return FALSE;
}
else
{
return GetFromWeb(url, szHeader, ppBuffer, pLen, nCalls+1);
}
}
catch (CFileException* e)
{
TCHAR strMsg[256];
e->GetErrorMessage(strMsg, 255);
cs_cout.Lock();
cout << "异常:" << strMsg;
cs_cout.Unlock();
return FALSE;
}
int length = pHttpFile->GetLength();
CString strLen;
pHttpFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH, strLen);
length = ::atoi(LPCTSTR(strLen));
if(length <= 0)
{
cs_cout.Lock();
cout << "GetFromWeb::文件长度错误:length" << length << ", strLen" << (LPCTSTR)strLen << endl;
cs_cout.Unlock();
return FALSE;
}
char* szBuffer = new char[length+1];
try
{
pHttpFile->Read(szBuffer, length);
}
catch (CInternetException* e)
{
OutputException2(e, url);
if(ppBuffer != NULL)
{
*ppBuffer = szBuffer;
}
else
{
delete[] szBuffer;
}
if(pLen!=NULL) *pLen = length;
pHttpFile->Close();
http_server->Close();
if(pHttpFile != NULL) delete pHttpFile;
if(http_server != NULL) delete http_server;
if(nReads >= 3)
{
cs_cout.Lock();
cout<<"从网络上读该数据达到3次,放弃操作"<<endl;
cs_cout.Unlock();
return FALSE;
}
else
{
return GetFromWeb(url, szHeader, ppBuffer, pLen, nCalls, nReads+1);
}
}
if(ppBuffer != NULL)
{
*ppBuffer = szBuffer;
}
else
{
delete[] szBuffer;
}
if(pLen!=NULL) *pLen = length;
pHttpFile->Close();
http_server->Close();
if(pHttpFile != NULL) delete pHttpFile;
if(http_server != NULL) delete http_server;
session.Close();
return TRUE;
BOOL bRet = TRUE;
return bRet;
}
{
CString strStart = _T("<div class=\"list\">");
CString strEnd = _T("</div>");
int nStart = url.Find(strStart);
if(nStart < 0) return;
int nEnd = url.Find(strEnd, nStart);
if(nEnd < 0) return;
CString strLinkStart = _T("href=\"");
CString strLinkEnd = _T("<");
CString strTitleStart = _T(">");
int nFind = url.Find(strLinkStart, nStart + strStart.GetLength());
while(nFind > 0 && nFind<nEnd)
{
int nLinkStartPos = nFind + strLinkStart.GetLength();
int nLinkEndPos = url.Find(_T("\""), nLinkStartPos);
CString strUrl = url.Mid(nLinkStartPos+1, nLinkEndPos - nLinkStartPos-1);
int nTitleStartPos = url.Find(strTitleStart, nLinkEndPos+1);
int nTitileEndPos = url.Find(strLinkEnd, nTitleStartPos+1);
CString strTitle = url.Mid(nTitleStartPos+1, nTitileEndPos - nTitleStartPos-1);
int lll = strTitle.GetLength();
wchar_t str1[3000];
MultiByteToWideChar(CP_UTF8, 0, strTitle.GetBuffer(0), strTitle.GetLength(), str1, strTitle.GetLength());
char str2[3000];
WideCharToMultiByte(CP_ACP, 0, str1, strTitle.GetLength()*2, str2, 150, NULL, FALSE);
str2[strTitle.GetLength()] = '\0';
for(int k=0; k<lll; k++)//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
{
if(str2[k] == 63)
{
str2[k] = 0;
break;
}
}//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
CString strTemp(str2), strTitle2;
strTitle2 = strTemp;
int aaa = strTemp.Find("<");
if(aaa>0)
{
strTitle2 = strTemp.Left(aaa);
}
aaa = strTitle2.Find("/");
if(aaa>0)
{
strTitle2 = strTitle2.Left(aaa);
}
aaa = strTitle2.Find("?");
if(aaa>0)
{
strTitle2 = strTitle2.Left(aaa);
}
aaa = strTitle2.Find("]");
if(aaa>0)
{
strTitle2 = strTitle2.Left(aaa+1);
}
aTitles.Add(strTitle2);
aURLs.Add(strBaseURL2 + strUrl);
nFind = url.Find(strLinkStart, nTitileEndPos + strLinkEnd.GetLength());
}
CStdioFile f;
try
{
CFileException *fe = new CFileException;
cs_write_file.Lock();
if(!f.Open(html_log, CFile::modeWrite, fe))
throw(fe);
f.Seek(0, CFile::end);
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
for(int i=0; i<aTitles.GetSize(); i++)
{
cs_cout.Lock();
cout << "GetPageURL:" <<(LPCTSTR)aTitles.GetAt(i) << endl;
cout << "GetPageURL:" <<aURLs.GetAt(i).GetBuffer(0) <<endl;
cs_cout.Unlock();
try
{
cs_write_file.Lock();
f.WriteString(aTitles.GetAt(i) + "\n");
f.WriteString(aURLs.GetAt(i) + "\n");
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
}
cs_write_file.Lock();
f.Close();
cs_write_file.Unlock();
}
BOOL DelTree(LPCTSTR lpszPath)
{
SHFILEOPSTRUCT FileOp;
FileOp.fFlags = FOF_NOCONFIRMATION|FOF_NOERRORUI;
FileOp.hNameMappings = NULL;
FileOp.hwnd = NULL;
FileOp.lpszProgressTitle = NULL;
FileOp.pFrom = lpszPath;
FileOp.pTo = NULL;
FileOp.wFunc = FO_DELETE;
int nRet = SHFileOperation(&FileOp);
CString str;
if(nRet == 0)
{
str = CString("删除:") + lpszPath + "\n";
}
else
{
str = CString("不能删除:") + lpszPath + "\n";
}
CStdioFile file;
cs_write_file.Lock();
if(!file.Open(strDirRemoveLog, CFile::modeWrite))
return FALSE;
file.Seek(0, CFile::end);
file.WriteString(str);
file.Close();
cs_write_file.Unlock();
return nRet==0;
}
void GetJpegURL(CString& url, CString& url2, CStringArray& aURLs, CString& strSavePath)
{
CStdioFile f1;
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!f1.Open(strSavePath+"\\pic_url.txt", CFile::modeCreate|CFile::modeWrite, fe))
throw(fe);
f1.WriteString(url2 + "\n");
f1.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
CString strStart = _T("<div class=\"content\">");
CString strEnd = _T("</div>");
int nStart = url.Find(strStart);
if(nStart < 0) return;
int nEnd = url.Find(strEnd, nStart);
if(nEnd < 0) return;
CString strLinkStart = _T("src=\"");
CString strLinkEnd = _T(">");
int nFind = url.Find(strLinkStart, nStart + strStart.GetLength());
while(nFind > 0 && nFind<nEnd)
{
int nLinkStartPos = nFind + strLinkStart.GetLength();
int nLinkEndPos = url.Find(strLinkEnd, nLinkStartPos);
CString strUrl = url.Mid(nLinkStartPos, nLinkEndPos - nLinkStartPos-1);
aURLs.Add(strUrl);
nFind = url.Find(strLinkStart, nLinkEndPos + strLinkEnd.GetLength());
}
// ASSERT(aURLs.GetSize()>0);
for(int i=0; i<aURLs.GetSize(); i++)
{
cs_cout.Lock();
cout << (LPCTSTR)aURLs.GetAt(i) << endl;
cs_cout.Unlock();
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!f1.Open(strSavePath+"\\pic_url.txt", CFile::modeWrite, fe))
throw(fe);
f1.Seek(0, CFile::end);
f1.WriteString(aURLs.GetAt(i) + "\n");
f1.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
}
}
{
CString url;
CString title;
};
UINT /*CALLBACK */FunWritePic(PVOID p)
{
ThreadParm parm, *pParm;
// memcpy(&parm, p, sizeof(ThreadParm));
pParm = (ThreadParm*)p;
parm.url = pParm->url;
parm.title = pParm->title;
pParm = &parm;
// TRACE("FunWritePic::%s, %s\n", parm.title, parm.url);
cs_pass_parm.Unlock();
// singleLock.Unlock();
pParm = &parm;
char* pBuffer2;
if(!GetFromWeb(pParm->url, HTML_header, &pBuffer2))
{
sem.Unlock();
cs_cout.Lock();
cout << "不能得到URL:" << (LPCTSTR)pParm->url <<endl << "标题:" << (LPCTSTR)pParm->title <<endl;
cs_cout.Unlock();
CStdioFile f_not_get;
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!f_not_get.Open(strFileNotGet, CFile::modeWrite, fe))
throw(fe);
f_not_get.Seek(0, CFile::end);
f_not_get.WriteString(pParm->url+"\n");
f_not_get.WriteString(pParm->title+"\n");
f_not_get.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
// RemoveDirectory(strSavePath);
return 0;
}
CString pageUrl2(pBuffer2);
CString strSavePath = strFileBasePath + pParm->title;
CreateDirectory(strSavePath, NULL);
CStringArray aPic;
GetJpegURL(pageUrl2, pParm->url, aPic, strSavePath);
CStdioFile fileSaveLog;
CString strSaveLog;
strSaveLog = strSavePath +CString(_T("\\save_log.txt"));
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!fileSaveLog.Open(strSaveLog, CFile::modeCreate|CFile::modeWrite, fe))
throw(fe);
fileSaveLog.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
int write_count = 0;
for(int k=0; k<aPic.GetSize(); k++)
{
char* pBuffer3 = NULL;
int len;
if(!GetFromWeb(aPic.GetAt(k), HTML_header, &pBuffer3, &len))
{
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!fileSaveLog.Open(strSaveLog, CFile::modeWrite, fe))
throw(fe);
fileSaveLog.Seek(0, CFile::end);
fileSaveLog.WriteString("不能保存 " + aPic.GetAt(k) + "\n");
fileSaveLog.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
continue;
}
if(pBuffer3!=NULL && (BYTE)(pBuffer3[0])==0xff && (BYTE)(pBuffer3[1])==0xd8 && (BYTE)(pBuffer3[2])==0xff)
{
CFile file;
CString picFile;
picFile.Format("%s\\%d.jpg", strSavePath.GetBuffer(0), k+1);
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!file.Open(picFile, CFile::modeCreate|CFile::modeWrite, fe))
throw(fe);
file.Write(pBuffer3, len);
file.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
CString strLog = "保存 " + aPic.GetAt(k) + " 到 " + picFile + "\n";
cs_cout.Lock();
cout << (LPCTSTR)strLog;
cs_cout.Unlock();
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
CStdioFile fileSaveLog2;
if(!fileSaveLog2.Open(strSaveLog, CFile::modeWrite, fe))
throw(fe);
fileSaveLog2.Seek(0, CFile::end);
fileSaveLog2.WriteString(strLog);
fileSaveLog2.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
write_count++;
}
delete[] pBuffer3;
pBuffer3 = NULL;
}
if(write_count == 0)
{
cs_cout.Lock();
cout << "不能得到图片 URL:" << (LPCTSTR)pParm->url <<endl << "标题:" << (LPCTSTR)pParm->title <<endl;
cs_cout.Unlock();
CStdioFile f_not_get;
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!f_not_get.Open(strFileNotGet, CFile::modeWrite, fe))
throw(fe);
f_not_get.Seek(0, CFile::end);
f_not_get.WriteString(pParm->url+"\n");
f_not_get.WriteString(pParm->title+"\n");
f_not_get.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
// RemoveDirectory(strSavePath);
cs_cout.Lock();
cout << "删除文件夹" << (LPCTSTR)strSavePath <<endl;
cs_cout.Unlock();
DelTree(strSavePath);
}
delete[] pBuffer2;
sem.Unlock();
return 1;
}
BOOL GetAndSavePic2()
{
HANDLE threads[32];
ThreadParm p;
for(int i=beg_page; i<=73; i++)
{
CString url;
if(i==1)
{
url.Format("%s/", strBaseURL);
}
else
{
url.Format("%s/11_%d.shtml", strBaseURL, i);
}
CStdioFile pageFile;
try
{
cs_write_file.Lock();
CFileException *fe = new CFileException;
if(!pageFile.Open(strPageLog, CFile::modeWrite, fe))
throw(fe);
pageFile.Seek(0, CFile::end);
pageFile.WriteString(url+"\n");
pageFile.Close();
cs_write_file.Unlock();
}
catch (CFileException* e)
{
OutputException(e);
}
char *pBuffer;
if(!GetFromWeb(url, HTML_header, &pBuffer)) continue;
CString pageUrl(pBuffer);
CStringArray aTitles, aURLs;
GetPageURL(pageUrl, aURLs, aTitles);
delete[] pBuffer;
int url_len = aURLs.GetSize();
for(int j=0; j<url_len; j++)
{
if(aURLs.GetAt(j).GetLength()<=0 || aTitles.GetAt(j).GetLength()<=0) continue;
TRACE(aTitles.GetAt(j)+"\n");
p.url = aURLs.GetAt(j);
p.title = aTitles.GetAt(j);
sem.Lock();
cs_pass_parm.Lock();
TRACE("%d, %s, %s\n", j, aURLs.GetAt(j), aTitles.GetAt(j));
/*CWinThread* pThread = */AfxBeginThread(FunWritePic, &p);
}
Sleep(1000);
}
// f.Close();
return TRUE;
}
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
int nRetCode = 0; // initialize MFC and print and error on failure
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
// TODO: change error code to suit your needs
cerr << _T("Fatal Error: MFC initialization failed") << endl;
nRetCode = 1;
}
else
{
// TODO: code your application's behavior here.
CString strHello;
strHello.LoadString(IDS_HELLO);
cout << (LPCTSTR)strHello << endl;
}
GetAndSavePic2();
return nRetCode;
}
用调试方式运行程序,当出现错误时选择“重试”、“中断”,看一下停在哪行代码,根据调用堆栈找出自己代码中出错的位置,查看是什么原因,然后再根据出错原因重新调试程序。2、UTF-8的编码转换问题,网上包括CSDN上有一些转换太麻烦,我用的是API,但是,转换的结尾有点问题,有没有更可靠的办法?代码在我注释了“&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&”的地方
MultiByteToWideChar(CP_UTF8, 0, strTitle.GetBuffer(0), -1, str1, 3000);
char str2[3000];
WideCharToMultiByte(CP_ACP, 0, str1, -1, str2, 3000, NULL, FALSE);3、个人水平有限,这个程序还有哪些不足,请大家指出
代码实在太长了,看不过来。
2、谢谢
3、理解
在存取 D:\清纯唯美\美丽小酒窝 [11P]\save_log.txt 时发生共享违例。
这个文件是由一个线程进行读写的,FunWritePic(PVOID p) ,代码在3楼!现在,很少出错这个错误,例如,只有我进行游戏时,CPU占用100%才容易出现这个错误