Golang之http_range


秒传:记录MD5等,判断是否有相同文件已经上传。

整体流程图

image-20230318221818839

客户端首先使用HEAD请求获取文件的信息,如文件大小,是否支持http range等,然后生成指定数量的线程去请求服务端文件range,全部请求结束之后,客户端组合请求的文件。如果有校验码,可以校验下传输过程是否有错误。

HEAD查看信息

func GetHeadInfo(url string) (size int, err error) {
    resp, err := http.Head(url)
    if err != nil {
        return 0, err
    }

    if resp.Header.Get("Accept-Ranges") != "bytes" {
        return 0, errors.New("range not supported")
    }

    return strconv.Atoi(resp.Header.Get("Content-Length"))
}

测试:

func TestHead(t *testing.T) {
    size, err := GetHeadInfo("http://orzlinux.cn/img/1676269431.png")
    fmt.Println(err)
    fmt.Println(size)
}
//<nil>
//103056

定义文件结构体

  • 文件大小
  • 分块数量(线程下载数量)
  • 对应的byte切片
  • 文件存储名
  • 文件存储路径
type DownFile struct {
    Size      int
    BlockSize int
    Data      []FilePart
    FileName  string
    FileDir   string
}

type FilePart struct {
    Id   int
    From int
    To   int
    Data []byte
}

验证文件夹

func checkDir(fileDir string) (string, error) {
    if !filepath.IsAbs(fileDir) {
        dir, err := os.Getwd() // 获取当前工作目录
        if err != nil {
            return "", err
        }
        fileDir = filepath.Join(dir, fileDir) // 拼接绝对路径文件夹
    }

    _, err := os.Stat(fileDir)
    if os.IsNotExist(err) {
        err = os.MkdirAll(fileDir, os.ModePerm)
    }
    if err != nil {
        return "", err
    }

    return fileDir, nil
}

测试:

func TestCheckDir(t *testing.T) {
    dir, err := checkDir("D:\\GoProjects\\kitex_learn\\http_range\\a\\b")
    if err != nil {
        println(err.Error())
    }
    println(dir)
}

构建文件结构体

func NewDownloadFile(url string, blockSize int, fileDir string, fileName string) (file *DownFile, err error) {
    dir, err := checkDir(fileDir)
    if err != nil {
        return nil, err
    }

    downFile := DownFile{
        BlockSize: blockSize,
        FileDir:   dir,
        Size:      0,
        Data:      make([]FilePart, blockSize),
        FileName:  filepath.Base(url),
    }
    return &downFile, nil
}

测试:

func TestNewDownloadFile(t *testing.T) {
    url := "https://orzlinux.cn/img/1676269431.png"
    size, err := GetHeadInfo(url)
    if err != nil {
        println(err.Error())
    }
    file, err := NewDownloadFile(url, 2, "./ccc", "")
    if err != nil {
        println(err.Error())
    }
    file.Size = size
    fmt.Printf("%#v", file)
}

// http_range.DownFile{
//    Size:103056, 
//    BlockSize:2, 
//    Data:[]http_range.FilePart{
//        http_range.FilePart{Id:0, From:0, To:0, Data:[]uint8(nil)},
//        http_range.FilePart{Id:0, From:0, To:0, Data:[]uint8(nil)}},
//    FileName:"1676269431.png",
//    FileDir:"D:\\GoProjects\\kitex_learn\\http_range\\ccc"
//}

如何部分请求文件

image-20230319110753789

在请求Header里面加上Range: bytes={start}-{end}键值对。返回206 Partial Content。就算bytes的范围包含了文件全部字节,返回status也是206

多线程下载文件

func (f *DownFile) Run() error {
    eachBytes := (f.Size + f.BlockSize - 1) / f.BlockSize
    waitGroup := sync.WaitGroup{}
    for i := 0; i < f.BlockSize; i++ {
        waitGroup.Add(1)
        go func(j int) {
            defer waitGroup.Done()
            defer fmt.Printf("Task " + strconv.Itoa(j) + " finished!\n")
            partialFile := FilePart{
                Data: []byte{},
                Id:   j,
                From: j * eachBytes,
                To:   j*eachBytes + eachBytes - 1,
            }
            request, err := http.NewRequest("GET", f.Url, nil)
            if err != nil {
                partialFile.Err = err
                f.Data[j] = partialFile
                return
            }
            request.Header.Set("Range", "bytes="+strconv.Itoa(partialFile.From)+"-"+strconv.Itoa(partialFile.To))
            client := http.Client{}
            response, err := client.Do(request)
            if err != nil {
                partialFile.Err = err
                f.Data[j] = partialFile
                return
            }
            bytes, err := ioutil.ReadAll(response.Body)
            partialFile.Data = bytes
            f.Data[j] = partialFile
        }(i)
    }

    waitGroup.Wait()
    err := f.MergeFile()
    return err
}

合并文件

func (f *DownFile) MergeFile() error {
    openFile, err := os.OpenFile(filepath.Join(f.FileDir, f.FileName), os.O_CREATE|os.O_WRONLY, 0666)
    if err != nil {
        return errors.New("Download fail: " + err.Error())
    }
    defer openFile.Close()
    writer := bufio.NewWriter(openFile)
    totalBytes := 0
    for i := 0; i < f.BlockSize; i++ {
        if err := f.Data[i].Err; err != nil {
            _ = os.Remove(filepath.Join(f.FileDir, f.FileName))
            return errors.New("Download fail: " + err.Error())
        }

        nn, err := writer.Write(f.Data[i].Data)
        totalBytes += nn
        if err != nil {
            return errors.New("Download fail: " + err.Error())
        }
    }
    fmt.Println("totalBytes: " + strconv.Itoa(totalBytes))
    err = writer.Flush()
    return err
}

测试

下载主函数

func DownloadFile(url string, threads int, fileDir string) error {
    fileSize, err := GetHeadInfo(url)
    if err != nil {
        return err
    }
    file, err := NewDownloadFile(url, threads, fileDir)
    file.Size = fileSize
    return file.Run()
}

测试

package http_range

import "testing"

func TestDownloadFile(t *testing.T) {
    type args struct {
        url     string
        threads int
        fileDir string
    }
    tests := []struct {
        name    string
        args    args
        wantErr bool
    }{
        // TODO: Add test cases.
        {
            name: "img",
            args:    args{url: "https://servicewechat.com/wxa-dev-logic/download_redirect?type=win32_x64&from=mpwiki&download_version=1062301160&version_type=1", threads: 64, fileDir: ""},
            wantErr: false,
        },
    }
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            if err := DownloadFile(tt.args.url, tt.args.threads, tt.args.fileDir, "aaa.exe"); (err != nil) != tt.wantErr {
                t.Errorf("DownloadFile() error = %v, wantErr %v", err, tt.wantErr)
            }
        })
    }
}

问题

部分链接速度的确有快,部分链接应该由于服务端限速策略等原因,并没有感觉快了。

参考文献

大文件上传:秒传、断点续传、分片上传

Go进阶49:HTTP断点续传多线程下载原理