C++实现文件内查找字符串

发布时间 2024-01-13 21:46:07作者: Computer_Tech

实现概要:

  • 读取放入buf后 查找匹配的第一个字符 然后使用seek()移动文件指针,peek()查看 剩余的字符是否匹配
  • 如果剩余的字符匹配 把该字符串在文件中的位置 push 进一个vector<int>中 再继续查看剩余的文件内容
// str2.cpp -- capacity() and reserve()
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include <malloc.h>
#include <windows.h>
#include <memory>
#include <iomanip>
//由于peek函数只能在当前文件指针下查看下一个字符, 应该自定义一个能移动到指定位置后查看下一个字符的peek.
#include <vector>
using namespace std;
int bufvolum = 512;
auto PeekInFile(ifstream& file, unsigned pos, unsigned peekNum)
{
    auto originPtr = file.tellg();
    string PeekedStr;
    PeekedStr.reserve(peekNum);
    if (!file.is_open())
    {
        return PeekedStr;
    }
    for (int i = 0; i < peekNum; ++i)
    {
        file.seekg(pos + i);
        if (file.fail())
        {
            cout << "seekg error!" << endl;
        }
        char ch = file.peek();
        PeekedStr.push_back(file.peek());

    }

    file.seekg(originPtr);
    return PeekedStr;
}

decltype(auto) FindInFile(ifstream& file, string substr, int pos = 0)
{
    vector<int> arrIndex;
    arrIndex.clear();
    if (!file.is_open())
    {
        return arrIndex;
    }
    file.seekg(pos);

    string buf;
    buf.reserve(bufvolum);
    buf.clear();

    auto j = substr.begin();
    auto getCount{0};
    ofstream  temp("temp.xml", ios_base::out);
    while (1)
    {
        file.read(buf.data(), bufvolum);
        buf.append(buf.data());
        auto i = buf.begin();
        auto backI = i;
        int realGet = file.gcount();
        getCount += realGet;

        for (; i < buf.end() ; ++i)
        {

            if (*i == *j)
            {
                buf.resize(bufvolum);
                int curIndex = getCount > bufvolum ? getCount - distance(i, buf.end()) : distance(buf.begin(), i) ;
                string PeekedStr(PeekInFile(file, curIndex, substr.size()));
                if (PeekedStr == substr)
                {
                    arrIndex.push_back(curIndex);
                }
            }
        }
        if (file.fail())
        {
            file.clear();
            break;
        }
        buf.clear();
    }
    return arrIndex;
}

int main()
{

    SetConsoleOutputCP(65001);
    ifstream fxml("C:\\Users\\34625\\Downloads\\cnblogs_blog_ComputerTech.20240111164025\\cnblogs.xml", ios_base::in); // create fis and associate with jamjar.txt
    if (!fxml.is_open())
    {
        cout << "\n open error!\n";
    }
    vector indexArr(move(FindInFile(fxml, "[TOC]"))) ;
    ofstream fwrite("xxxxx.xml");
    if (!fwrite.is_open())
    {
        fxml.close();
        return 0;
    }

    string buf;
    buf.reserve(bufvolum);
    for (auto i = indexArr.begin(); i < indexArr.end() ; ++i)
    {
        int need2write {0};
        if (i + 1 == indexArr.end())
        {
            auto originPos = fxml.tellg();
            fxml.seekg(0, ios_base::end);
            auto filesize = fxml.tellg();
            fxml.seekg(originPos);
            need2write = filesize - *i;
        }
        else
        {
            need2write = *(i + 1) - *i ;
        }

        int residual = need2write % bufvolum;
        unsigned time = need2write / bufvolum ;

        fxml.seekg(*i);
        if (!fxml.is_open() || fxml.fail() || fxml.bad())
        {
            cout << endl << "fxml error" << endl;
            system("pause");
        }

        while (time--)
        {
            if (fxml.fail() || fxml.bad())
            {
                cout << "failllllllllllllll" << endl;
                system("pause");
                break;
            }
            fxml.get(buf.data(), bufvolum + 1, EOF);
            int getNum = fxml.gcount();
            buf.append(buf.data());
            fwrite.write(buf.data(), fxml.gcount());//写入fwrite
            cout << buf.data();//
            buf.clear();
        }
        if (residual)
        {
            buf.clear();
            fxml.get(buf.data(), residual + 1, EOF );

            buf.append(buf.data());
            fwrite.write(buf.data(), residual);
            cout << buf.c_str();
        }
    }
    fwrite.close();
    fxml.close();
    system("pause");
    return 0;
}