how to convert go's type from uint8 to unit32?
Just code:
package main
import (
"fmt"
)
func main() {
uInt8 := []uint8{0,1,2,3}
var uInt32 uint32
uInt32 = uint32(uInt8)
fmt.Printf("%v to %v\n", uInt8, uInt32)
}
~>6g test.go && 6l -o test test.6 && ./test
test.go:10: cannot convert uInt8 (type []uint8) to type uint32
package main
import (
"encoding/binary"
"fmt"
)
func main() {
u8 := []uint8{0, 1, 2, 3}
u32LE := binary.LittleEndian.Uint32(u8)
fmt.Println("little-endian:", u8, "to", u32LE)
u32BE := binary.BigEndian.Uint32(u8)
fmt.Println("big-endian: ", u8, "to", u32BE)
}
Output:
little-endian: [0 1 2 3] to 50462976
big-endian: [0 1 2 3] to 66051
The Go binary package functions are implemented as a series of shifts.
func (littleEndian) Uint32(b []byte) uint32 {
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func (bigEndian) Uint32(b []byte) uint32 {
return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
}
Are you trying to the following?
t := []int{1, 2, 3, 4}
s := make([]interface{}, len(t))
for i, v := range t {
s[i] = v
}
Related
I am trying to perform a circular shift in Swift and I currently have the following code which uses a string containing the bits of the number I am trying to perform the circular shift on. Here is the code I have so far,
func circularRightShift(_ input: UInt8, _ amount: UInt8) -> UInt8 {
guard amount > 0 else { return input }
var a = String(UInt("\(input)")!, radix: 2)
if a.count != 8 {
a = "".padding(toLength: 8-a.count, withPad: "0", startingAt: 0) + a
}
for _ in 1...amount {
a.insert(a.last!, at: a.startIndex)
a.removeLast()
}
return UInt8(a, radix: 2)!
}
This code works properly, but it's a bit slow. Is there any better way to achieve this, possibly without using a string? Thanks in advance.
You can achieve this with two bit shift operators and a bitwise OR:
func circularRightShift(_ input: UInt8, _ amount: UInt8) -> UInt8 {
let amount = amount % 8 // Reduce to the range 0...7
return (input >> amount) | (input << (8 - amount))
}
Example (amount=5):
abcdefgh <- bits of input
00000abc <- bits of input >> amount
defgh000 <- bits of input << (8 - amount)
defghabc <- bits of result
Another, more general answer is this:
func circularShift<ShiftType: BinaryInteger>(_ first: ShiftType, by shiftAmount: Int) -> ShiftType {
(first << shiftAmount) | (first >> (first.bitWidth - shiftAmount))
}
You can also create an extension:
extension BinaryInteger {
func circularShifted(by shiftAmount: Int) -> Self {
(self << shiftAmount) | (self >> (self.bitWidth - shiftAmount))
}
}
To be even more flexible, you can take the sign of the operation into account and differentiate between left and right shifting:
extension BinaryInteger where Self: UnsignedInteger {
func rotateLeft(by shiftAmount: Int) -> Self {
if shiftAmount < 0 {
return rotateRight(by: -shiftAmount)
}
return (self << shiftAmount) | (self >> (self.bitWidth - shiftAmount))
}
func rotateRight(by shiftAmount: Int) -> Self {
if shiftAmount < 0 {
return rotateLeft(by: -shiftAmount)
}
return (self >> shiftAmount) | (self << (self.bitWidth - shiftAmount))
}
}
which can then be used like
let x: UInt8 = 0xE
String(x.rotateLeft(by: 2), radix: 16) // 3A
String(x.rotateRight(by: 2), radix: 16) // A3
One approach is to shift your number as 16-bits, and then combine the two bytes with each other, like this:
func circular(n: UInt8, k: UInt8) -> UInt8 {
var s = UInt16(n) << (k & 0x07)
return UInt8(s & 0xFF) | UInt8(s >> 8)
}
Demo
I'm querying an instance of PostgreSQL and selecting a sum of a decimal value:
db=# SELECT SUM(distance) AS total_distance FROM table_name WHERE deleted_at IS NULL;
total_distance
-----------------------
3808.0666666666666578
(1 row)
When I try to execute this query in Rust:
extern crate postgres;
use postgres::{Connection, TlsMode};
fn main() {
let conn = Connection::connect("postgresql://u:p#localhost:5432/db", TlsMode::None).unwrap();
let query = "SELECT SUM(distance) AS total_distance FROM table_name WHERE deleted_at IS NULL;";
for row in &conn.query(query, &[]).unwrap() {
let total_distance: f64 = row.get("total_distance");
println!("{}", total_distance);
}
}
Results in:
thread 'main' panicked at 'error retrieving column "total_distance": Error(Conversion(WrongType(Type(Numeric))))'
I've seen in various threads that the Numeric type isn't supported by the Postgres crate, so I've tried creating my own numeric type:
#[derive(Debug)]
struct Float64(f64);
impl FromSql for Float64 {
fn from_sql(ty: &Type, raw: &[u8]) -> Result<Float64, Box<Error + Sync + Send>> {
let bytes = raw.try_into().expect("failed!");
Ok(Float64(f64::from_be_bytes(bytes)))
}
fn from_sql_null(ty: &Type) -> Result<Float64, Box<Error + Sync + Send>> {
Ok(Float64(0.0))
}
fn from_sql_nullable(
ty: &Type,
raw: Option<&[u8]>,
) -> Result<Float64, Box<Error + Sync + Send>> {
match raw {
None => Ok(Float64(0.0)),
Some(value) => Float64::from_sql(ty, value),
}
}
fn accepts(ty: &Type) -> bool {
NUMERIC.eq(ty)
}
}
impl Display for Float64 {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_string())
}
}
But this still doesn't work as the raw bytes fail to unwrap:
thread 'main' panicked at 'failed!: TryFromSliceError(())', src/libcore/result.rs:1165:5
raw: &[u8] has the length of 18, which is why it can't unwrap. What would be the best way to convert an 18 byte slice to f64?
I need to implement an algorithm to check if an input is valid by calculating a modulo of a String.
The code in Kotlin:
private val facteurs = arrayOf(7, 3, 1)
private fun modulo(s: String): Int {
var result = 0
var i = -1
var idx = 0
for (c in s.toUpperCase()) {
val value:Int
if (c == '<') {
value = 0
} else if (c in "0123456789") {
value = c - '0'
} else if (c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
value = c.toInt() - 55
} else {
throw IllegalArgumentException("Unexpected character: $c at position $idx")
}
i += 1
result += value * facteurs[i % 3]
idx += 1
}
return result % 10
}
This implies doing math operations on the characters.
Is there an elegant way to do this in Swift 3 and 4?
I tried some cumbersome constructs like this :
value = Int(c.unicodeScalars) - Int("0".first!.unicodeScalars)
But it does not even compile.
I'm currently using Swift 4 with XCode9, but Swift3 answer is welcome too.
You can enumerate the unicodeScalars view of a string together
with the running index, use switch/case pattern matching,
and access the numeric .value of the unicode scalar:
func modulo(_ s: String) -> Int? {
let facteurs = [7, 3, 1]
var result = 0
for (idx, uc) in s.uppercased().unicodeScalars.enumerated() {
let value: UInt32
switch uc {
case "<":
value = 0
case "0"..."9":
value = uc.value - UnicodeScalar("0").value
case "A"..."Z":
value = uc.value - UnicodeScalar("A").value + 10
default:
return nil
}
result += Int(value) * facteurs[idx % facteurs.count]
}
return result % 10
}
This compiles with both Swift 3 and 4. Of course you could also
throw an error instead of returning nil for invalid input.
Note that "<", "0", "9" etc.
in the switch statement are inferred from the context as UnicodeScalar,
not as String or Character, therefore "0"..."9" (in this context)
is a ClosedRange<UnicodeScalar> and uc can be matched against
that range.
Something like this works for me:
"A".utf16.first! + 2 //comes out to 67
Careful with the forced unwrap "!"
If you need the scalars value you can do
"A".unicodeScalars.first!.value + 2
More reading can be done on this here in the SPL.
For the c Character type value you could do this:
String(c).unicodeScalars.first!.value + 2
Here is an attempt to mod the function:
func modulo(s: String) -> Int? {
var result = 0
var factors = [7,3,1]
for (i, c) in s.uppercased().characters.enumerated() {
let char = String(c)
var val: Int
if char == "<" {
val = 0
} else if "0123456789".contains(char) {
val = Int(char.unicodeScalars.first!.value - "0".unicodeScalars.first!.value)
} else if "ABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(char) {
val = Int(char.unicodeScalars.first!.value - 55)
} else {
return nil
}
result += val * factors[(i) % 3]
}
return result % 10
}
This is in swift 3...in 4 I believe you can just iterate over the string without converting to Chars
I'm trying to send an icmp message whose TTL is just 1, and expect to receive a time exceeded message. that message does come(I see it from wireshark), but my program blocks on syscall.Recvfrom. Anyone knows why?
icmp.go
package main
import (
"bytes"
"encoding/binary"
"fmt"
"net"
"os"
"syscall"
)
type ICMP struct {
Type uint8
Code uint8
Checksum uint16
Identifier uint16
SeqNo uint16
}
func Checksum(data []byte) uint16 {
var (
sum uint32
length int = len(data)
index int
)
for length > 1 {
sum += uint32(data[index])<<8 + uint32(data[index+1])
index += 2
length -= 2
}
if length > 0 {
sum += uint32(data[index])
}
sum += (sum >> 16)
return uint16(^sum)
}
func main() {
h := Header{
Version: 4,
Len: 20,
TotalLen: 20 + 8,
TTL: 1,
Protocol: 1,
// Dst:
}
argc := len(os.Args)
if argc < 2 {
fmt.Println("usage: program + host")
return
}
ipAddr, _ := net.ResolveIPAddr("ip", os.Args[1])
h.Dst = ipAddr.IP
icmpReq := ICMP{
Type: 8,
Code: 0,
Identifier: 0,
SeqNo: 0,
}
out, err := h.Marshal()
if err != nil {
fmt.Println("ip header error", err)
return
}
var icmpBuf bytes.Buffer
binary.Write(&icmpBuf, binary.BigEndian, icmpReq)
icmpReq.Checksum = Checksum(icmpBuf.Bytes())
icmpBuf.Reset()
binary.Write(&icmpBuf, binary.BigEndian, icmpReq)
fd, _ := syscall.Socket(syscall.AF_INET, syscall.SOCK_RAW, syscall.IPPROTO_RAW)
addr := syscall.SockaddrInet4{
Port: 0,
}
copy(addr.Addr[:], ipAddr.IP[12:16])
pkg := append(out, icmpBuf.Bytes()...)
fmt.Println("ip length", len(pkg))
if err := syscall.Sendto(fd, pkg, 0, &addr); err != nil {
fmt.Println("Sendto err:", err)
}
var recvBuf []byte
if nBytes, rAddr, err := syscall.Recvfrom(fd, recvBuf, 0); err == nil {
fmt.Printf("recv %d bytes from %v\n", nBytes, rAddr)
}
}
additionally, I use header.go and helper.go from https://github.com/golang/net/tree/master/ipv4
As Andy pointed out, the raw(7) man page says:
An IPPROTO_RAW socket is send only. If you really want to receive
all IP packets, use a packet(7) socket with the ETH_P_IP protocol.
Note that packet sockets don't reassemble IP fragments, unlike raw
sockets.
I know I can receive ICMP reply if I set IPPROTO_ICMP as the protocol when I create the socket, but I need to set TTL to 1 which must be done in IP layer. Therefore I send the ICMP request with IPPROTO_RAW socket, after that I use net.ListenIP to receive ICMP messages. Here is the code:
package main
import (
"bytes"
"encoding/binary"
"log"
"net"
"os"
"syscall"
)
const icmpID uint16 = 43565 // use a magic number for now
type ICMP struct {
Type uint8
Code uint8
Checksum uint16
Identifier uint16
SeqNo uint16
}
func Checksum(data []byte) uint16 {
var (
sum uint32
length int = len(data)
index int
)
for length > 1 {
sum += uint32(data[index])<<8 + uint32(data[index+1])
index += 2
length -= 2
}
if length > 0 {
sum += uint32(data[index])
}
sum += (sum >> 16)
return uint16(^sum)
}
func main() {
h := Header{
Version: 4,
Len: 20,
TotalLen: 20 + 8,
TTL: 1,
Protocol: 1,
}
argc := len(os.Args)
if argc < 2 {
log.Println("usage: program + host")
return
}
ipAddr, _ := net.ResolveIPAddr("ip", os.Args[1])
h.Dst = ipAddr.IP
icmpReq := ICMP{
Type: 8,
Code: 0,
Identifier: icmpID,
SeqNo: 1,
}
out, err := h.Marshal()
if err != nil {
log.Println("ip header error", err)
return
}
var icmpBuf bytes.Buffer
binary.Write(&icmpBuf, binary.BigEndian, icmpReq)
icmpReq.Checksum = Checksum(icmpBuf.Bytes())
icmpBuf.Reset()
binary.Write(&icmpBuf, binary.BigEndian, icmpReq)
fd, _ := syscall.Socket(syscall.AF_INET, syscall.SOCK_RAW, syscall.IPPROTO_RAW)
addr := syscall.SockaddrInet4{
Port: 0,
}
copy(addr.Addr[:], ipAddr.IP[12:16])
pkg := append(out, icmpBuf.Bytes()...)
if err := syscall.Sendto(fd, pkg, 0, &addr); err != nil {
log.Println("Sendto err:", err)
}
laddr, err := net.ResolveIPAddr("ip4:icmp", "0.0.0.0")
if err != nil {
log.Fatal(err)
}
c, err := net.ListenIP("ip4:icmp", laddr)
if err != nil {
log.Fatal(err)
}
for {
buf := make([]byte, 2048)
n, raddr, err := c.ReadFrom(buf)
if err != nil {
log.Println(err)
continue
}
icmpType := buf[0]
if icmpType == 11 {
if n == 36 { // Time exceeded messages
// A time exceeded message contain IP header(20 bytes) and first 64 bits of the original payload
id := binary.BigEndian.Uint16(buf[32:34])
log.Println("recv id", id)
if id == icmpID {
log.Println("recv Time Exceeded from", raddr)
}
}
}
}
}
Actually, I am writing a traceroute in go, if anyone is interested about that, the whole code is in github.
I think you need to give IPPROTO_ICMP as the protocol when you create your socket. The raw(7) man page says that an IPPROTO_RAW socket is send only. Also, if you use IPPROTO_ICMP, you don't give the IP header. (Note: I haven't actually tried this in Go.)
I can read the file to bytes array
but when I convert it to string
it treat the utf16 bytes as ascii
How to convert it correctly?
package main
import ("fmt"
"os"
"bufio"
)
func main(){
// read whole the file
f, err := os.Open("test.txt")
if err != nil {
fmt.Printf("error opening file: %v\n",err)
os.Exit(1)
}
r := bufio.NewReader(f)
var s,b,e = r.ReadLine()
if e==nil{
fmt.Println(b)
fmt.Println(s)
fmt.Println(string(s))
}
}
output:
false
[255 254 91 0 83 0 99 0 114 0 105 0 112 0 116 0 32 0 73 0 110 0 102 0 111 0 93 0
13 0]
S c r i p t I n f o ]
Update:
After I tested the two examples, I have understanded what is the exact problem now.
In windows, if I add the line break (CR+LF) at the end of the line, the CR will be read in the line. Because the readline function cannot handle unicode correctly ([OD OA]=ok, [OD 00 OA 00]=not ok).
If the readline function can recognize unicode, it should understand [OD 00 OA 00] and return []uint16 rather than []bytes.
So I think I should not use bufio.NewReader as it is not able to read utf16, I don't see bufio.NewReader.ReadLine can accept parameter as flag to indicate the reading text is utf8, utf16le/be or utf32. Is there any readline function for unicode text in go library?
The latest version of golang.org/x/text/encoding/unicode makes it easier to do this because it includes unicode.BOMOverride, which will intelligently interpret the BOM.
Here is ReadFileUTF16(), which is like os.ReadFile() but decodes UTF-16.
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"strings"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
// Similar to ioutil.ReadFile() but decodes UTF-16. Useful when
// reading data from MS-Windows systems that generate UTF-16BE files,
// but will do the right thing if other BOMs are found.
func ReadFileUTF16(filename string) ([]byte, error) {
// Read the file into a []byte:
raw, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
// Make an tranformer that converts MS-Win default to UTF8:
win16be := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
// Make a transformer that is like win16be, but abides by BOM:
utf16bom := unicode.BOMOverride(win16be.NewDecoder())
// Make a Reader that uses utf16bom:
unicodeReader := transform.NewReader(bytes.NewReader(raw), utf16bom)
// decode and print:
decoded, err := ioutil.ReadAll(unicodeReader)
return decoded, err
}
func main() {
data, err := ReadFileUTF16("inputfile.txt")
if err != nil {
log.Fatal(err)
}
final := strings.Replace(string(data), "\r\n", "\n", -1)
fmt.Println(final)
}
Here is NewScannerUTF16 which is like os.Open() but returns a scanner.
package main
import (
"bufio"
"fmt"
"log"
"os"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
type utfScanner interface {
Read(p []byte) (n int, err error)
}
// Creates a scanner similar to os.Open() but decodes the file as UTF-16.
// Useful when reading data from MS-Windows systems that generate UTF-16BE
// files, but will do the right thing if other BOMs are found.
func NewScannerUTF16(filename string) (utfScanner, error) {
// Read the file into a []byte:
file, err := os.Open(filename)
if err != nil {
return nil, err
}
// Make an tranformer that converts MS-Win default to UTF8:
win16be := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
// Make a transformer that is like win16be, but abides by BOM:
utf16bom := unicode.BOMOverride(win16be.NewDecoder())
// Make a Reader that uses utf16bom:
unicodeReader := transform.NewReader(file, utf16bom)
return unicodeReader, nil
}
func main() {
s, err := NewScannerUTF16("inputfile.txt")
if err != nil {
log.Fatal(err)
}
scanner := bufio.NewScanner(s)
for scanner.Scan() {
fmt.Println(scanner.Text()) // Println will add back the final '\n'
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading inputfile:", err)
}
}
FYI: I have put these functions into an open source module and have made further improvements. See https://github.com/TomOnTime/utfutil/
UTF16, UTF8, and Byte Order Marks are defined by the Unicode Consortium: UTF-16 FAQ, UTF-8 FAQ, and Byte Order Mark (BOM) FAQ.
Issue 4802: bufio: reading lines is too cumbersome
Reading lines from a file is too cumbersome in Go.
People are often drawn to bufio.Reader.ReadLine because of its name,
but it has a weird signature, returning (line []byte, isPrefix bool,
err error), and requires a lot of work.
ReadSlice and ReadString require a delimiter byte, which is almost
always the obvious and unsightly '\n', and also can return both a line
and an EOF
Revision: f685026a2d38
bufio: new Scanner interface
Add a new, simple interface for scanning (probably textual) data,
based on a new type called Scanner. It does its own internal
buffering, so should be plausibly efficient even without injecting a
bufio.Reader. The format of the input is defined by a "split
function", by default splitting into lines.
go1.1beta1 released
You can download binary and source distributions from the usual place:
https://code.google.com/p/go/downloads/list?q=go1.1beta1
Here's a program which uses the Unicode rules to convert UTF16 text file lines to Go UTF8 encoded strings. The code has been revised to take advantage of the new bufio.Scanner interface in Go 1.1.
package main
import (
"bufio"
"bytes"
"encoding/binary"
"fmt"
"os"
"runtime"
"unicode/utf16"
"unicode/utf8"
)
// UTF16BytesToString converts UTF-16 encoded bytes, in big or little endian byte order,
// to a UTF-8 encoded string.
func UTF16BytesToString(b []byte, o binary.ByteOrder) string {
utf := make([]uint16, (len(b)+(2-1))/2)
for i := 0; i+(2-1) < len(b); i += 2 {
utf[i/2] = o.Uint16(b[i:])
}
if len(b)/2 < len(utf) {
utf[len(utf)-1] = utf8.RuneError
}
return string(utf16.Decode(utf))
}
// UTF-16 endian byte order
const (
unknownEndian = iota
bigEndian
littleEndian
)
// dropCREndian drops a terminal \r from the endian data.
func dropCREndian(data []byte, t1, t2 byte) []byte {
if len(data) > 1 {
if data[len(data)-2] == t1 && data[len(data)-1] == t2 {
return data[0 : len(data)-2]
}
}
return data
}
// dropCRBE drops a terminal \r from the big endian data.
func dropCRBE(data []byte) []byte {
return dropCREndian(data, '\x00', '\r')
}
// dropCRLE drops a terminal \r from the little endian data.
func dropCRLE(data []byte) []byte {
return dropCREndian(data, '\r', '\x00')
}
// dropCR drops a terminal \r from the data.
func dropCR(data []byte) ([]byte, int) {
var endian = unknownEndian
switch ld := len(data); {
case ld != len(dropCRLE(data)):
endian = littleEndian
case ld != len(dropCRBE(data)):
endian = bigEndian
}
return data, endian
}
// SplitFunc is a split function for a Scanner that returns each line of
// text, stripped of any trailing end-of-line marker. The returned line may
// be empty. The end-of-line marker is one optional carriage return followed
// by one mandatory newline. In regular expression notation, it is `\r?\n`.
// The last non-empty line of input will be returned even if it has no
// newline.
func ScanUTF16LinesFunc(byteOrder binary.ByteOrder) (bufio.SplitFunc, func() binary.ByteOrder) {
// Function closure variables
var endian = unknownEndian
switch byteOrder {
case binary.BigEndian:
endian = bigEndian
case binary.LittleEndian:
endian = littleEndian
}
const bom = 0xFEFF
var checkBOM bool = endian == unknownEndian
// Scanner split function
splitFunc := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if checkBOM {
checkBOM = false
if len(data) > 1 {
switch uint16(bom) {
case uint16(data[0])<<8 | uint16(data[1]):
endian = bigEndian
return 2, nil, nil
case uint16(data[1])<<8 | uint16(data[0]):
endian = littleEndian
return 2, nil, nil
}
}
}
// Scan for newline-terminated lines.
i := 0
for {
j := bytes.IndexByte(data[i:], '\n')
if j < 0 {
break
}
i += j
switch e := i % 2; e {
case 1: // UTF-16BE
if endian != littleEndian {
if i > 1 {
if data[i-1] == '\x00' {
endian = bigEndian
// We have a full newline-terminated line.
return i + 1, dropCRBE(data[0 : i-1]), nil
}
}
}
case 0: // UTF-16LE
if endian != bigEndian {
if i+1 < len(data) {
i++
if data[i] == '\x00' {
endian = littleEndian
// We have a full newline-terminated line.
return i + 1, dropCRLE(data[0 : i-1]), nil
}
}
}
}
i++
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
// drop CR.
advance = len(data)
switch endian {
case bigEndian:
data = dropCRBE(data)
case littleEndian:
data = dropCRLE(data)
default:
data, endian = dropCR(data)
}
if endian == unknownEndian {
if runtime.GOOS == "windows" {
endian = littleEndian
} else {
endian = bigEndian
}
}
return advance, data, nil
}
// Request more data.
return 0, nil, nil
}
// Endian byte order function
orderFunc := func() (byteOrder binary.ByteOrder) {
switch endian {
case bigEndian:
byteOrder = binary.BigEndian
case littleEndian:
byteOrder = binary.LittleEndian
}
return byteOrder
}
return splitFunc, orderFunc
}
func main() {
file, err := os.Open("utf16.le.txt")
if err != nil {
fmt.Println(err)
os.Exit(1)
}
defer file.Close()
fmt.Println(file.Name())
rdr := bufio.NewReader(file)
scanner := bufio.NewScanner(rdr)
var bo binary.ByteOrder // unknown, infer from data
// bo = binary.LittleEndian // windows
splitFunc, orderFunc := ScanUTF16LinesFunc(bo)
scanner.Split(splitFunc)
for scanner.Scan() {
b := scanner.Bytes()
s := UTF16BytesToString(b, orderFunc())
fmt.Println(len(s), s)
fmt.Println(len(b), b)
}
fmt.Println(orderFunc())
if err := scanner.Err(); err != nil {
fmt.Println(err)
}
}
Output:
utf16.le.txt
15 "Hello, 世界"
22 [34 0 72 0 101 0 108 0 108 0 111 0 44 0 32 0 22 78 76 117 34 0]
0
0 []
15 "Hello, 世界"
22 [34 0 72 0 101 0 108 0 108 0 111 0 44 0 32 0 22 78 76 117 34 0]
LittleEndian
utf16.be.txt
15 "Hello, 世界"
22 [0 34 0 72 0 101 0 108 0 108 0 111 0 44 0 32 78 22 117 76 0 34]
0
0 []
15 "Hello, 世界"
22 [0 34 0 72 0 101 0 108 0 108 0 111 0 44 0 32 78 22 117 76 0 34]
BigEndian
Here is the simplest way to read it:
package main
import (
"bufio"
"fmt"
"log"
"os"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
func main() {
file, err := os.Open("./text.txt")
if err != nil {
log.Fatal(err)
}
scanner := bufio.NewScanner(transform.NewReader(file, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()))
for scanner.Scan() {
fmt.Printf(scanner.Text())
}
}
since Windows use little-endian order by default link, we use unicode.UseBOM policy to retrieve BOM from the text, and unicode.LittleEndian as a fallback
For example:
package main
import (
"errors"
"fmt"
"log"
"unicode/utf16"
)
func utf16toString(b []uint8) (string, error) {
if len(b)&1 != 0 {
return "", errors.New("len(b) must be even")
}
// Check BOM
var bom int
if len(b) >= 2 {
switch n := int(b[0])<<8 | int(b[1]); n {
case 0xfffe:
bom = 1
fallthrough
case 0xfeff:
b = b[2:]
}
}
w := make([]uint16, len(b)/2)
for i := range w {
w[i] = uint16(b[2*i+bom&1])<<8 | uint16(b[2*i+(bom+1)&1])
}
return string(utf16.Decode(w)), nil
}
func main() {
// Simulated data from e.g. a file
b := []byte{255, 254, 91, 0, 83, 0, 99, 0, 114, 0, 105, 0, 112, 0, 116, 0, 32, 0, 73, 0, 110, 0, 102, 0, 111, 0, 93, 0, 13, 0}
s, err := utf16toString(b)
if err != nil {
log.Fatal(err)
}
fmt.Printf("%q", s)
}
(Also here)
Output:
"[Script Info]\r"
If you want anything to print as a string you could use fmt.Sprint
package main
import (
"bufio"
"fmt"
"os"
)
func main() {
// read whole the file
f, err := os.Open("test.txt")
if err != nil {
fmt.Printf("error opening file: %v\n", err)
return
}
r := bufio.NewReader(f)
var s, _, e = r.ReadLine()
if e != nil {
fmt.Println(e)
return
}
fmt.Println(fmt.Sprint(string(s)))
}