go语言渐入佳境[48]-gbk编码转换为Utf8编码

在go语言中,如果有时候获取到的中文网站是GBK编码,这时转换为UTF-8编码
需下载两个辅助包:

1
2
> gopm get -g -v golang.org/x/text
> gopm get -g -v golang.org/x/net/html

下面简单的爬虫展示将gbk网址转换为UTF-8的例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
package main

import (
"net/http"
"io/ioutil"
"fmt"
"io"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"bufio"
"golang.org/x/text/transform"
)

func main(){

resp,err:= http.Get("http://www.chinanews.com/")

if err!=nil{
panic(err)
}


defer resp.Body.Close()
if resp.StatusCode != http.StatusOK{
fmt.Println("Error: status code",resp.StatusCode)
}
e:= determineEncoding(resp.Body)
utf8reader:= transform.NewReader(resp.Body,e.NewDecoder())

all,err:= ioutil.ReadAll(utf8reader)
if err!=nil{
panic(err)
}
fmt.Printf("%s\n",all)
}
func determineEncoding(r io.Reader) encoding.Encoding{
bytes,err := bufio.NewReader(r).Peek(1024)
if err !=nil{
panic(err)
}
e,_,_:= charset.DetermineEncoding(bytes,"")
return e
}