本文整理汇总了Python中tensorflow.python.ops.string_ops.unicode_transcode函数的典型用法代码示例。如果您正苦于以下问题:Python unicode_transcode函数的具体用法?Python unicode_transcode怎么用?Python unicode_transcode使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unicode_transcode函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_transcode_utf8_simple
def test_transcode_utf8_simple(self):
strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
outputs = string_ops.unicode_transcode(
strings,
input_encoding="ISO-8859-1",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
outputs = string_ops.unicode_transcode(
strings,
input_encoding="US-ASCII",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:33,代码来源:unicode_transcode_op_test.py
示例2: test_invalid_encoding_causes_errors
def test_invalid_encoding_causes_errors(self):
strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="invalid",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
with self.assertRaisesOpError(
"Could not create converter for input encoding: invalid"):
self.evaluate(outputs)
with self.assertRaisesRegexp(ValueError, "Op passed string 'invalid'"):
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="invalid",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:25,代码来源:unicode_transcode_op_test.py
示例3: test_cjk_encodings
def test_cjk_encodings(self):
strings_ja = [
b"\x5c\x5c", # Yen sign
b"\x8f\x70", # kanji character "waza"
b"\x83\x4f"
] # katakana character "gu"
strings_zh_cn = [b"\xca\xf5"] # simplified "shu4"
strings_zh_tw = [b"\xb3\x4e"] # traditional "shu4"
strings_ko = [b"\xc7\xd1\xb9\xce"] # hangul "hanmin"
expected_ja = [s.decode("shift_jis").encode("UTF-8") for s in strings_ja]
expected_zh_cn = [
s.decode("gb18030").encode("UTF-8") for s in strings_zh_cn
]
expected_zh_tw = [s.decode("big5").encode("UTF-8") for s in strings_zh_tw]
expected_ko = [s.decode("euc_kr").encode("UTF-8") for s in strings_ko]
with self.cached_session() as sess:
outputs_ja = string_ops.unicode_transcode(
strings_ja,
input_encoding="shift_jis",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
outputs_zh_cn = string_ops.unicode_transcode(
strings_zh_cn,
input_encoding="gb18030",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
outputs_zh_tw = string_ops.unicode_transcode(
strings_zh_tw,
input_encoding="big5",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
outputs_ko = string_ops.unicode_transcode(
strings_ko,
input_encoding="euc_kr",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
result_ja, result_zh_cn, result_zh_tw, result_ko = sess.run(
[outputs_ja, outputs_zh_cn, outputs_zh_tw, outputs_ko])
self.assertAllEqual(result_ja, expected_ja)
self.assertAllEqual(result_zh_cn, expected_zh_cn)
self.assertAllEqual(result_zh_tw, expected_zh_tw)
self.assertAllEqual(result_ko, expected_ko)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:53,代码来源:unicode_transcode_op_test.py
示例4: test_transcode_utf8_with_bom
def test_transcode_utf8_with_bom(self):
bom_string = b"\xef\xbb\xbfabcdefg"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-8", output_encoding="UTF-8")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\xef\xbb\xbfabcdefg") # BOM preserved
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-8", output_encoding="UTF-16-BE")
values = self.evaluate(outputs)
utf16expected = bom_string.decode("UTF-8").encode("UTF-16-BE")
self.assertAllEqual(values, utf16expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:13,代码来源:unicode_transcode_op_test.py
示例5: test_transcode_utf8_with_replacement_char
def test_transcode_utf8_with_replacement_char(self):
strings = [b"a\xef\xbf\xbd"]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings, input_encoding="UTF-8", output_encoding="UTF-8",
errors="strict")
values = self.evaluate(outputs)
self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
outputs = string_ops.unicode_transcode(
strings, input_encoding="UTF-8", output_encoding="UTF-8",
errors="replace", replacement_char=ord("?"))
values = self.evaluate(outputs)
self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py
示例6: test_transcode_bad_utf8_termination_with_defaults
def test_transcode_bad_utf8_termination_with_defaults(self):
bad_string = b"a\xf0"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"a\xef\xbf\xbd") # 0xFFFD
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:7,代码来源:unicode_transcode_op_test.py
示例7: test_transcode_bad_utf8_with_defaults
def test_transcode_bad_utf8_with_defaults(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
values = sess.run(outputs)
self.assertAllEqual(values, b"\x00\xef\xbf\xbd")
开发者ID:abhinav-upadhyay,项目名称:tensorflow,代码行数:7,代码来源:unicode_transcode_op_test.py
示例8: test_transcode_bad_utf8_with_space_replacement
def test_transcode_bad_utf8_with_space_replacement(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string, input_encoding="UTF-8", output_encoding="UTF-8",
replacement_char=ord(" "))
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\x00 ")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:8,代码来源:unicode_transcode_op_test.py
示例9: test_transcode_bad_utf8_with_elision_of_malformatting
def test_transcode_bad_utf8_with_elision_of_malformatting(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="ignore")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\x00")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:10,代码来源:unicode_transcode_op_test.py
示例10: test_transcode_bad_utf8_with_elision_including_control_chars
def test_transcode_bad_utf8_with_elision_including_control_chars(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="ignore",
replace_control_characters=True)
values = self.evaluate(outputs)
self.assertAllEqual(values, b"")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py
示例11: test_transcode_bad_utf8_start_with_strict_errors
def test_transcode_bad_utf8_start_with_strict_errors(self):
bad_string = b"\xffabcd"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="strict")
with self.assertRaisesOpError(
"Invalid formatting on input string"):
self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py
示例12: test_transcode_utf16_le_be_with_bom
def test_transcode_utf16_le_be_with_bom(self):
bom_string = b"\xfe\xff\x00\x61" # Big-endian BOM with 'a' encoded
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-16-BE", output_encoding="UTF-8")
values = self.evaluate(outputs)
# BOM is preserved in output
self.assertAllEqual(values, b"\xef\xbb\xbfa")
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
values = self.evaluate(outputs)
# mangled BOM and value from (incorrect) LE encoding
self.assertAllEqual(values, b"\xef\xbf\xbe\xe6\x84\x80")
bom_string = b"\xff\xfe\x61\x00" # Little-endian BOM with 'a' encoded
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\xef\xbb\xbfa")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:20,代码来源:unicode_transcode_op_test.py
示例13: test_transcode_ascii_with_shift_chars
def test_transcode_ascii_with_shift_chars(self):
strings = [b"\x0e\x0e", b"\x0f\x0f"]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="US-ASCII",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py
示例14: test_transcode_bad_utf8_with_some_good
def test_transcode_bad_utf8_with_some_good(self):
bad_string = b"abc\xffabcdefg"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, b"abc abcdefg")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:12,代码来源:unicode_transcode_op_test.py
示例15: test_transcode_utf8_to_utf32
def test_transcode_utf8_to_utf32(self):
strings = [b"ab\xe2\x82\xac", b"\xf0\x90\x90\xb7"]
expected = [s.decode("UTF-8").encode("UTF-32-BE") for s in strings]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="UTF-32-BE",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:12,代码来源:unicode_transcode_op_test.py
示例16: test_forwarding
def test_forwarding(self):
with self.cached_session():
# Generate an input that is uniquely consumed by the transcode op.
# This exercises code paths which are optimized for this case
# (e.g., using forwarding).
inp = string_ops.substr(
constant_op.constant([b"AbCdEfG", b"HiJkLmN"], dtypes.string),
pos=0,
len=5)
transcoded = string_ops.unicode_transcode(
inp, input_encoding="UTF-8", output_encoding="UTF-8")
self.assertAllEqual([b"AbCdE", b"HiJkL"], transcoded)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:13,代码来源:unicode_transcode_op_test.py
示例17: test_transcode_bad_utf8
def test_transcode_bad_utf8(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=True)
values = sess.run(outputs)
self.assertAllEqual(values, b" ")
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = sess.run(outputs)
self.assertAllEqual(values, b"\x00 ")
开发者ID:abhinav-upadhyay,项目名称:tensorflow,代码行数:22,代码来源:unicode_transcode_op_test.py
示例18: test_transcode_utf16_to_utf8
def test_transcode_utf16_to_utf8(self):
strings = [b"\x00a\x00b\x20\xAC", b"\xD8\x01\xDC\x37"] # U+10437
expected = [s.decode("UTF-16-BE").encode("UTF-8") for s in strings]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-16",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py
示例19: test_invalid_error_policy_causes_errors
def test_invalid_error_policy_causes_errors(self):
strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]
with self.assertRaisesRegexp(
ValueError, "'invalid' not in: \"strict\", \"replace\", \"ignore\"."):
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="invalid",
replacement_char=ord(" "),
replace_control_characters=False)
self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py
示例20: test_bom_handling
def test_bom_handling(self, string, input_encoding, expected):
with self.test_session():
output = string_ops.unicode_transcode(
string, input_encoding=input_encoding, output_encoding="UTF-8")
self.assertAllEqual(output.eval(), expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:5,代码来源:unicode_transcode_op_test.py
注:本文中的tensorflow.python.ops.string_ops.unicode_transcode函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论