C标准库中的strtok

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
 
#ifdef WIN32
#define EU_STRTOK strtok_s
#define EU_STRDUP _strdup
#else
#define EU_STRTOK strtok_r
#define EU_STRDUP strdup
#endif
 
int main()
{
	char str[1000] = "Steven Jobs, Bruce Lee, Jack Chen, George W Bush";
	char* pNames[100];
	int idxNames = 0;
	char* pWords[100];
	int idxWords = 0;
	char* buf = str;
	char* outPtr = NULL;
	while (NULL != (buf = EU_STRTOK(NULL == outPtr? buf : NULL, ",", &outPtr)))
	{
		char* innerPtr = NULL;
		pNames[idxNames++] = EU_STRDUP(buf);
		while (NULL != (buf = EU_STRTOK(NULL == innerPtr? buf : NULL, " ", &innerPtr)))
		{
			pWords[idxWords++] = buf;
		}
	}
 
	return 0; // here, to watch pNames and pWords contents in debug mode.
}

strtok有一个缺陷,比如对于源串",hello,the,world!",分隔符",",分隔结果会少了空串""。

std版字符串token提取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <iostream>
#include <locale>
#include <sstream>
 
struct csv_whitespace : std::ctype<wchar_t>
{
	bool do_is(mask m, char_type c) const
	{
		if ((m & space) && c == L' ') {
			return false; // space will NOT be classified as whitespace
		}
		if ((m & space) && c == L',') {
			return true; // comma will be classified as whitespace
		}
		return ctype::do_is(m, c); // leave the rest to the parent class
	}
};
 
int main()
{
	std::wstring in = L"Column 1,  Column 2, Column 3\n123,456,789";
	std::wstring token;
 
	std::wcout << "default locale:\n";
	std::wistringstream s1(in);
	while (s1 >> token) {
		std::wcout << ">" << token << '\n';
	}
 
	std::wcout << "locale with modified ctype:\n";
	std::wistringstream s2(in);
	csv_whitespace* my_ws = new csv_whitespace;
	s2.imbue(std::locale(s2.getloc(), my_ws));
	while (s2 >> token) {
		std::wcout << ">" << token << '\n';
	}
	return 0;
}

std版宽字符转ASCII字符

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#include <iostream>
#include <locale>
 
int main()
{
	std::locale loc;
 
	const char narrow_phrase[] = "Seventy-seven foxes";
	wchar_t wide_phrase[sizeof(narrow_phrase)];
 
	std::wcout << L"The first wide character is: ";
	wchar_t wc = std::use_facet<std::ctype<wchar_t>>(loc).widen(*narrow_phrase);
	std::wcout << wc << std::endl;
 
	std::wcout << L"The wide-character phrase is: ";
	std::use_facet<std::ctype<wchar_t>>(loc).widen(narrow_phrase,
		narrow_phrase + sizeof(narrow_phrase),
		wide_phrase);
	std::wcout << wide_phrase << std::endl;
 
	return 0;
}

std版日期字符串解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <iostream>
#include <sstream>
#include <locale>
#include <iomanip>
 
int main()
{
	std::tm t = {};
	std::istringstream ss("2011-Februar-18 23:12:34");
	//ss.imbue(std::locale("de_DE.utf-8"));
	try {
		ss.imbue(std::locale("de-DE"));
	}
	catch (std::exception e) {
		std::cout << e.what() << std::endl;
	}
	ss >> std::get_time(&t, "%Y-%b-%d %H:%M:%S");
	if (ss.fail()) {
		std::cout << "Parse failed\n";
	}
	else {
		std::cout << std::put_time(&t, "%c") << '\n';
	}
	return 0;
}

refer to:
https://en.cppreference.com/w/cpp/locale/locale/name
https://social.msdn.microsoft.com/Forums/en-US/0fb287af-bb58-4c60-a3da-14fe84c16948/stdlocaleglobalstdlocalequotzhcnquot-gets-quotbad-locale-namequot?forum=vcgeneral
https://docs.microsoft.com/en-us/cpp/c-runtime-library/locale-names-languages-and-country-region-strings?redirectedfrom=MSDN&view=msvc-160