強火で進め

このブログではプログラム関連の記事を中心に書いてます。

Unity で UTF-16 の文字列を UTF-8 と UTF-32 に変換する C# のコード

UTF-16 の文字列を UTF-8UTF-32 に変換するプログラムを検証したので折角なのでメモっておく。

	void Start () {
		var uint32Size = sizeof(UInt32);
		var info = "";
		//var str = "\uD83D\uDC36"; // U+1F436(Dog Face)
		//var str = "\uD83D\uDEA1"; // U+1F6A1(Aerial Tramway)
		var str = "\uD83D\uDEA3\uD83C\uDFFB"; // U+1F6A3, U+1F3FB(White Rowboat)
		byte[] unicodeBytes = Encoding.Unicode.GetBytes(str);
		info = "byte[]:";
		for (int i = 0; i < unicodeBytes.Length; i++) {
			info += " "+unicodeBytes[i].ToString("X2");
		}
		Debug.Log (info);
		Debug.Log ("unicodeBytes: "+BitConverter.ToString(unicodeBytes));
		var utf8Bytes = Encoding.Convert (Encoding.Unicode, Encoding.UTF8, unicodeBytes);
		var utf32Bytes = Encoding.Convert (Encoding.Unicode, Encoding.UTF32, unicodeBytes);
		Debug.Log ("utf8Bytes: "+BitConverter.ToString(utf8Bytes));
		Debug.Log ("utf32Bytes: "+BitConverter.ToString(utf32Bytes));
		info = "ToUInt32:";
		for (int i = 0; i < utf32Bytes.Length / uint32Size; i++) {
			info += " "+BitConverter.ToUInt32 (utf32Bytes, i*uint32Size).ToString ("X4");
		}
		Debug.Log (info);
		var utf8FileName = "utf8.txt";
		using (var fs = new FileStream(utf8FileName, FileMode.Create, FileAccess.Write))
		{
			fs.Write(utf8Bytes, 0, utf8Bytes.Length);
		}
		var utf32FileName = "utf32.txt";
		using (var fs = new FileStream(utf32FileName, FileMode.Create, FileAccess.Write))
		{
			// BOM
			var utf32Enc = new UTF32Encoding();
			var bomData = utf32Enc.GetPreamble ();
			fs.Write(bomData, 0, bomData.Length);
			for (int i = 0; i < utf32Bytes.Length / uint32Size; i++) {
				if (BitConverter.ToUInt32(utf32Bytes, i*uint32Size) == 0) {
					break;
				}
				fs.Write(utf32Bytes, i*uint32Size, uint32Size);
			}
		}
	}