cpp: read and write utf-8 text file

发布时间 2023-04-22 20:58:37作者: ®Geovin Du Dream Park™

 

/*****************************************************************//**
 * \file   geovindu.h
 * \brief  业务操作方法
 *
 * \author geovindu,Geovin Du
 * \date   2023-04-22
***********************************************************************/
/**
 * https://learn.microsoft.com/zh-cn/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
 * 
 * .
 */


#pragma once

#define _UNICODE

#ifndef GEOVINDU_H
#define GEOVINDU_H

#include <iostream>
#include <windows.h>
#include<string>
#include<string.h>
#include<fstream>
#include<stdio.h>
#include<cstdlib>
#include<cstring>
#include<iomanip>
#include <iostream>
#include <windows.h>


namespace geovindu
{

	class Geovin
	{

	private:

	public:


		/// <summary>
		/// 
		/// </summary>
		/// <param name="buffer"></param>
		/// <param name="len"></param>
		/// <returns></returns>
		//string to_utf8(const wchar_t* buffer, int len);
		/// <summary>
		/// 
		/// </summary>
		/// <param name="str"></param>
		/// <returns></returns>
		//string to_utf8(const wstring& str);

		/// <summary>
		/// 
		/// </summary>
		/// <param name="str"></param>
		//void createFile(wstring& str);
		/// <summary>
		/// 写成UTF-8文本文件
		/// </summary>
		void createFile();

	};

};


#endif
#define UNICODE

  

#define _UNICODE

#include <iostream>
#include <windows.h>
#include<string>
#include<string.h>
#include<fstream>
#include<stdio.h>
#include<cstdlib>
#include<cstring>
#include<iomanip>
#include "geovindu.h"


using namespace std;

namespace geovindu
{

	/// <summary>
	/// 
	/// </summary>
	/// <param name="buffer"></param>
	/// <param name="len"></param>
	/// <returns></returns>
	string to_utf8(const wchar_t* buffer, int len)
	{
		int nChars = ::WideCharToMultiByte(
			CP_UTF8,
			0,
			buffer,
			len,
			NULL,
			0,
			NULL,
			NULL);
		if (nChars == 0) return "";
		string newbuffer;
		newbuffer.resize(nChars);
		::WideCharToMultiByte(
			CP_UTF8,
			0,
			buffer,
			len,
			const_cast<char*>(newbuffer.c_str()),
			nChars,
			NULL,
			NULL);

		return newbuffer;
	}
	/// <summary>
	/// 
	/// </summary>
	/// <param name="str"></param>
	/// <returns></returns>
	string to_utf8(const wstring& str)
	{
		return to_utf8(str.c_str(), (int)str.size());
	}
	/// <summary>
	/// 
	/// </summary>
	void createFile(wstring& strchinese)
	{

		ofstream testFile;

		testFile.open("demoinput.txt", std::ios::out | std::ios::binary);

		//std::wstring text = strchinese;			

		std::string outtext = to_utf8(strchinese);

		testFile << outtext;

		testFile.close();

	}
	///<summary>
	/// 现有的文本写成UTF-8文本文件
	///</summary>
	void Geovin::createFile()
	{

		ofstream testFile;

		testFile.open("geovindudemo.txt", std::ios::out | std::ios::binary);

		std::wstring text =
			L"涂聚文,你好,世界欢迎你!동생은 점수를 많이 땄어요\t geovindu\n Geovin Du \nНематериальное наследие водной рифмы\n"
			L"奇松・怪石・雲海と温泉\t大黄河を望む炳霊寺、驚異の張掖丹霞とシルクロードの要所9日間\n"
			L"Tours más solicitados\tParaíso en la Tierra - 13 Días\n"
			L"Entdecken Sie die schönsten Reiseziele von China mit unseren empfohlenen Touren.\n"
			L"Explorez les destinations les plus étonnantes de la Chine avec les visites recommandées.\n"
			L"\n";

		std::string outtext = to_utf8(text);

		testFile << outtext;

		testFile.close();

	}

};

#define UNICODE

  

// ConsoleTextFileDemoApp.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//geovindu Geovin Du
#define _UNICODE
#define _CRT_SECURE_NO_WARNINGS


#include <iostream>
#include <windows.h>
#include <string>
#include <string.h>
#include <fstream>
#include <stdio.h>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <cstdio>
#include <codecvt>
#include <assert.h>
#include <windows.h>
#include <iostream>
#include <fstream>
#include <io.h>
#include <vector>

#include "ConvertEncode.h"
#include "geovindu.h"
#include "FileHelper.h"


using namespace std;
using namespace geovindu;





/// <summary>
/// 写成UTF-8文本文件
/// </summary>
void createFile(wstring& strchinese)
{
	ConvertEncode encode;
	wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> convert;

	ofstream testFile;

	testFile.open("geovinduinput.txt", std::ios::out | std::ios::binary);

	//std::wstring text = strchinese;


	std::string outtext = convert.to_bytes(strchinese);//

	testFile << outtext;

	testFile.close();

	std::string narrowStr = convert.to_bytes(strchinese);
	{
		std::ofstream ofs("geovinduinput2.txt");			//文件是utf8编码
		ofs << narrowStr;
	}

}
/// <summary>
/// 读文写文件 utf-8的文本文件
/// </summary>
void readfile()
{
	ConvertEncode encode;
	char sname[50];
	string stuID;//学号
	int num;//编号
	double english;//英语成绩
	double math;//数学成绩
	double cpp;//C++成绩
	vector<string> lines;
	string line;
	ifstream fin;
	fin.open("geovinduinput.txt", ios::in); //utf-8文件读
	if (!fin)
	{
		cout << "Fail to open the file!" << endl;
		exit(0);
	}

	//创建链表,并保存数据
	while (1)
	{
		if (!(fin >> sname >> stuID >> english >> math >> cpp))//从文件中读取数据 中文没有读出来
		{
			break;
		}
		else
		{
			cout << encode.UTF8ToGBDu(sname) << "\t" << stuID << "\t" << english << "\t" << math << "\t" << cpp << endl;
		}
	}

	while (getline(fin, line)) {
		lines.push_back(line);
	}
	fin.close();
	//cout << encode.UTF8ToGBDu(sname) << "\t" << stuID << "\t" << english << "\t" << math << "\t" << cpp << endl;
	
}


const int FBLOCK_MAX_BYTES = 256;
/*
// File Type.
typedef enum FileType
{
	ANSI = 0,
	unicode,
	UTF8,
}FILETYPE;

FILETYPE GetTextFileType(const std::string& strFileName);

int UnicodeToANSI(char* pDes, const wchar_t* pSrc);
*/

int main(void)
{
    std::cout << "Hello World! 涂聚文\n";

	




	/*代码无用
			FileHelper helper;
			// file test.
			std::string strFileANSI = "studentANSI.txt";
			std::string strFileUNICODE = "student.txt";
			std::string strFileUTF8 = "geovindudemo.txt";

			// please change the file name to test.
			std::string strFileName = strFileUTF8;
			//文件类型没有读对
			TEXTFILETYPE fileType = helper.GetTextFileType(strFileName);

			if (TextFileType_UNICODE == fileType)
			{
				wchar_t szBuf[FBLOCK_MAX_BYTES];
				memset(szBuf, 0, sizeof(wchar_t) * FBLOCK_MAX_BYTES);

				std::string strMessage;

				FILE* fp = NULL;
				fp = fopen(strFileName.c_str(), "rb");
				if (fp != NULL)
				{
					// Unicode file should offset wchar_t bits(2 byte) from start.
					fseek(fp, sizeof(wchar_t), 0);
					while (fread(szBuf, sizeof(wchar_t), FBLOCK_MAX_BYTES, fp) > 0)
					{
						char szTemp[FBLOCK_MAX_BYTES] = { 0 };

						helper.UnicodeToANSI(szTemp, szBuf);
						strMessage += szTemp;
						memset(szBuf, 0, sizeof(wchar_t) * FBLOCK_MAX_BYTES);
					}
				}
				cout << "UNICODE" << endl;
				std::cout << strMessage << std::endl;

				fclose(fp);
			}
			else if (TextFileType_UTF8 == fileType)
			{
				char szBuf[FBLOCK_MAX_BYTES];
				memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);

				std::string strMessage;

				FILE* fp = NULL;
				fp = fopen(strFileName.c_str(), "rb");
				if (fp != NULL)
				{
					// UTF-8 file should offset 3 byte from start position.
					fseek(fp, sizeof(char) * 3, 0);
					while (fread(szBuf, sizeof(char), FBLOCK_MAX_BYTES, fp) > 0)
					{
						strMessage += szBuf;
						memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);
					}
				}
				cout << "utf-8" << endl;
				std::cout << strMessage << std::endl;

				fclose(fp);
			}
			else
			{
				char szBuf[FBLOCK_MAX_BYTES];
				memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);

				std::string strMessage;

				FILE* fp = NULL;
				fp = fopen(strFileName.c_str(), "rb");
				if (fp != NULL)
				{
					// common file do not offset.
					while (fread(szBuf, sizeof(char), FBLOCK_MAX_BYTES, fp) > 0)
					{
						strMessage += szBuf;
						memset(szBuf, 0, sizeof(char) * FBLOCK_MAX_BYTES);
					}
				}
				cout << "ANSI" << endl;
				std::cout << strMessage << std::endl;

				fclose(fp);


			}


	*/




	readfile();
	//读内容
	//std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
	//std::ifstream ifs(L"geovinduinput.txt");
	//while (!ifs.eof())
	//{
	//	string line;
	//	getline(ifs, line);
	//	wstring wb = conv.from_bytes(line);
	//	wcout.imbue(locale("chs"));			//更改区域设置 只为控制台输出显示 其他语言显示不了,中文可以
	//	wcout << wb << endl;
	//}
	//ifs.close();


    Geovin geovin;
    geovin.createFile();
	wstring allstr;
    wstring sname;
	wstring stuID;//学号
	int num;//编号
	double english;//英语成绩
	double math;//数学成绩
	double cpp;//C++成绩
	int location = 0;//位置编号
	int flag = 0;//标记是否有对应的编号

	wcout << "请输入新增学生的信息" << endl;
	wcout << "姓名\t" << "学号\t" << "英语\t" << "数学\t" << "C++\t" << endl;
	wcin.imbue(locale("chs"));//获取的是中文
   
    wcin >> sname >> stuID >> english >> math >> cpp;

	//allstr = sname + ' ' + stuID;
	allstr.append(sname); //C++ wstring::append
	allstr.append(L"\t");
	allstr.append(stuID);
	allstr.append(L"\t");
	allstr.append(to_wstring(english));
	allstr.append(L"\t");
	allstr.append(to_wstring(math));
	allstr.append(L"\t");
	allstr.append(to_wstring(cpp));
   // createFile(allstr);


	system("pause");
	return 0;


}

// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单
// 调试程序: F5 或调试 >“开始调试”菜单

// 入门使用技巧: 
//   1. 使用解决方案资源管理器窗口添加/管理文件
//   2. 使用团队资源管理器窗口连接到源代码管理
//   3. 使用输出窗口查看生成输出和其他消息
//   4. 使用错误列表窗口查看错误
//   5. 转到“项目”>“添加新项”以创建新的代码文件,或转到“项目”>“添加现有项”以将现有代码文件添加到项目
//   6. 将来,若要再次打开此项目,请转到“文件”>“打开”>“项目”并选择 .sln 文件





#define UNICODE