c – 如何在不写入任何内容的情况下获得stringstream的实际最大
我正在使用下面的程序处理大的libpacp文件.
我对字符串流可以从OS分配的内存的实际最大大小感到困惑. 代码的第一部分是用于处理libpacp文件的程序. 第二部分是测试程序. 环境:Windows 10,VS,符合Win32-Released(32位)模式. 第一部分: #include <fstream> #include <iostream> #include <sstream> #include <string> #include <ctime> #include <cstdio> #define HeaderBytes 24 #define MaxPkgBytes 65544 //65536+8 #define KeepDays 7 #define KeepSeconds (KeepDays*86400) #define StartTimeOffset (-1*86400) // -1 day using namespace std; typedef struct{ int size; char data[MaxPkgBytes]; }pkg; int catoi(const char* ca){ char tmp[4]; int* iptr; for (int i = 0; i < 4; i++){ tmp[i] = ca[3 - i]; } iptr = reinterpret_cast<int*>(tmp); return *iptr; } #ifdef _MSC_VER #include <windows.h> #include <iomanip> wstring str2wstr(const std::string& s) { int len; int slength = (int)s.length() + 1; len = MultiByteToWideChar(CP_ACP,s.c_str(),slength,0); wchar_t* buf = new wchar_t[len]; MultiByteToWideChar(CP_ACP,buf,len); wstring wstr(buf); return wstr; } #endif // _MSC_VER int main(int argc,char** argv){ string inFileName,outFileName; stringstream outBuf; fstream fs_in,fs_out; char buf_char; int buf_int,headercount = 0,curPkgIdx= 0,lastPkgIdx = 1,tmp; bool isBroken = false,isValid; clock_t mytime; unsigned int StartTime = 0,PkgTime; pkg buf_pkg[2]; if (argc != 2){ return 1; } inFileName = argv[1]; fs_in.open(inFileName,ios::binary | ios::in); if (!fs_in){ cout << "Can't open the file: " << inFileName << endl; return 1; } outFileName = inFileName; outFileName.insert(outFileName.rfind('.'),"_integrated"); fs_out.open(outFileName,ios::binary | ios::out); if (!fs_out){ cout << "Can't open the file: " << outFileName << endl; return 1; } int invalidPConuter = 0; long long outBufMaxPos = 0; buf_pkg[0].size = 0; buf_pkg[1].size = 0; mytime = clock(); fs_in.read(buf_pkg[curPkgIdx].data,HeaderBytes); outBuf.write(buf_pkg[curPkgIdx].data,HeaderBytes); if (fs_in){ fs_in.read(buf_pkg[curPkgIdx].data,4); StartTime = catoi(buf_pkg[curPkgIdx].data); StartTime += StartTimeOffset; fs_in.seekg(-4,ios_base::cur); } cout << "start" << endl; while (fs_in.get(buf_char)){ fs_in.seekg(-1,ios_base::cur); if (buf_char == -95 ){ //0xa1 fs_in.read(reinterpret_cast<char*>(&buf_int),sizeof(int)); if (buf_int == 0xd4c3b2a1){ //a1b2 c3d4 fs_in.seekg(HeaderBytes-4,ios_base::cur); headercount++; } else fs_in.seekg(-4,ios_base::cur); } else{ fs_in.read(buf_pkg[curPkgIdx].data,16); PkgTime = catoi(buf_pkg[curPkgIdx].data); /*Set isValid*/ if (PkgTime - StartTime < KeepSeconds) isValid = true; else isValid = false; if (isValid){ //last packetage is valid /*store size of packetage*/ buf_pkg[curPkgIdx].size = catoi(buf_pkg[curPkgIdx].data + 8); /*store size of packetage*/ if (buf_pkg[curPkgIdx].size > MaxPkgBytes) isValid = false; } if (isValid) //Pass packet size check { /*read packetage data*/ fs_in.read(buf_pkg[curPkgIdx].data + 16,buf_pkg[curPkgIdx].size); buf_pkg[curPkgIdx].size += 16; /*read packetage data*/ /*write last packetage data*/ outBuf.write(buf_pkg[lastPkgIdx].data,buf_pkg[lastPkgIdx].size); if (static_cast<long long>(outBuf.tellp()) > outBufMaxPos) { outBufMaxPos = static_cast<long long>(outBuf.tellp()); } else if (static_cast<long long>(outBuf.tellp()) == -1) { cout << "outBufMaxPos: " << outBufMaxPos << endl; system("pause"); } if (outBuf.tellp() >= 0x40000000 - MaxPkgBytes) // 1GB { cout << "write" << endl; fs_out << outBuf.rdbuf(); outBuf.str(""); outBuf.clear(); } /*write last packetage data*/ /*swap idx of buffer*/ tmp = curPkgIdx; curPkgIdx = lastPkgIdx; lastPkgIdx = tmp; /*swap idx of buffer*/ } if (!isValid) { ++invalidPConuter; isBroken = true; fs_in.seekg(-buf_pkg[lastPkgIdx].size - 15,ios_base::cur); /*search correct packetage byte by byte*/ int tmpflag = 0; /*Let PkgTime be invalid. If packet is invalid because of its size,original PkgTime was valid*/ PkgTime = StartTime + KeepSeconds; while (PkgTime - StartTime >= KeepSeconds && fs_in.read(buf_pkg[curPkgIdx].data,4)){ PkgTime = catoi(buf_pkg[curPkgIdx].data); fs_in.seekg(-3,ios_base::cur); } fs_in.seekg(-1,ios_base::cur); /*search correct packetage byte by byte*/ buf_pkg[lastPkgIdx].size = 0; //reset the size of the invalid packetage } } } fs_in.close(); mytime = clock() - mytime; cout << "Repair pacp: " << mytime << " miniseconds." << endl; cout << "Number of deleted headers: " << headercount << endl; mytime = clock(); if (headercount || isBroken){ fs_out << outBuf.rdbuf(); fs_out.close(); #ifdef _MSC_VER wstring originFileName,newFileName; originFileName = str2wstr(inFileName); newFileName = str2wstr(inFileName.insert(inFileName.rfind("."),"_origin")); int flag = MoveFileExW(originFileName.c_str(),newFileName.c_str(),0); if (!flag) { cout << "fail to rename origin file" << endl; cout << showbase // show the 0x prefix << internal // fill between the prefix and the number << setfill('0'); // fill with 0s cout << "Error code: " << hex << setw(4) << GetLastError() << dec << endl; } else { newFileName = originFileName; originFileName = str2wstr(outFileName); flag = MoveFileExW(originFileName.c_str(),0); if (!flag) { cout << "fail to rename output file" << endl; cout << showbase // show the 0x prefix << internal // fill between the prefix and the number << setfill('0'); // fill with 0s cout << "Error code: " << hex << setw(4) << GetLastError() << dec << endl; } } #endif //_MSC_VER } else { wstring tmpwstr = str2wstr(outFileName); fs_out.close(); if (!DeleteFileW(tmpwstr.c_str())) { cout << "Cannot deleted tmp file (integrated)" << endl; } cout << "The file is completed. Do nothing." << endl; } mytime = clock() - mytime; cout << "Rename file: " << mytime << " miniseconds." << endl; system("pause"); return 0; } 第一部分的伪代码: using namespace std; int main(int argc,char** argv){ //leave over the varibles string inFileName,outFileName; fstream fs_out; char buf_char; int buf_int,PkgTime; pkg buf_pkg[2]; int invalidPConuter = 0; long long outBufMaxPos = 0; //the varibles will be mentioned fstream fs_in; stringstream outBuf; fs_in.read(Header); outBuf.write(Header); if (fs_in){ StartTime = first_packet_time + StartTimeOffset; } while (!fs_in.eof()){ if (a header read from fs_in){ skip the block of header } else{ fs_in.read(packet header); if (time of packet isValid){ check size of packet } if (size and time isValid) { fs_in.read(packet data); outBuf.write(packet data); if(outBuf out of range) { print(max stringstream size) system("pause"); } if (outBuf size >= 1GB) { write outBuf into output file } } if (size or time isNotValid) { find next valid packet byte by byte } } } fs_in.close(); system("pause"); return 0; } 第二部分: #include <iostream> #include <typeinfo> #include <sstream> #include <string> using namespace std; #define testsize (80*1024*1024) int main() { stringstream ss; char* buf = new char[testsize]; int i = 0; memset(buf,'a',testsize); while (i < 30) { ss.write(buf,testsize); cout << ss.tellp()/1024/1024 << endl; ++i; } system("pause"); } 在第一部分中,stringstream的最大大小限制为大约674MB. 但在第二部分中,stringstream的最大大小限制在2GB左右. 为什么他们不同? 如何在不写入任何内容的情况下获得字符串流的实际最大大小? 我搜索过相关问题,但答案对我没有帮助. 解决方法
简短的回答是,除了尝试之外,你通常不会/不会知道.
操作系统有一个内存池.该池在系统上当前正在执行的所有进程之间共享(加上一些不完全属于进程的设备驱动程序,但这种区别目前并不重要). 在典型情况下,池的总体大小是未知的并且通常是不可知的.它可能会动态更改,因为(例如)磁盘已添加到系统或从系统中删除. 系统上任何一个进程可用的池的百分比通常也是不可知的.其他进程正在启动和停止,通常分配和释放内存,并且许多进程在操作期间分配和释放内存. 所有这一切都是动态发生的,所以尝试分配可能会在一瞬间成功,在下一阶段失败,并在稍后再次成功.如果操作系统提供了(例如)一个函数来告诉你在调用它时有多少可用内存,那么结果在返回给调用者之前可能很容易出错. 有一些硬性限制.对于一个明显的,32位进程只有4千兆字节的地址空间.尝试通过常规方法(例如,新)分配(比如说)8千兆字节是不可能的. 大小为N的单个分配在该地址范围内需要一系列N个连续字节.特别是在进程运行一段时间后,可用的地址空间(独立于底层内存)将趋向于碎片化,因此无论可用内存如何,可以成功的最大单个分配将是最大片段的大小.可用的地址空间. 在某些情况下,还存在“软”限制.例如,在Windows1中,您可以创建“作业对象”,并指定在该作业对象中运行的进程使用的最大内存.即使物理RAM可用,这也可以防止分配成功. 因此,在任何特定时刻,可以成功的最大分配是六个(或左右)不同因素中的任何一个的最小值,几乎所有因素都可以接受几乎不可预测的变化.知道什么会起作用的唯一现实方法是尝试分配你需要的东西,看看是否成功. 这里我以Windows为例,因为问题涉及Windows.虽然机制和名称各不相同,但基本思想远非Windows独有;大多数其他操作系统提供类似的功能 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |