用最小的内存发送大文件|用最小的内存发送大文件 翻译+分析

【用最小的内存发送大文件|用最小的内存发送大文件 翻译+分析】原文:
https://medium.com/@owlwalks/sending-big-file-with-minimal-memory-in-golang-8f3fc280d2c
一般我们发送文件

buf := new(bytes.Buffer)writer := multipart.NewWriter(buf)defer writer.Close()part, err := writer.CreateFormFile("myFile", "foo.txt")if err != nil {return err}file, err := os.Open(name)if err != nil {return err}defer file.Close()if _, err = io.Copy(part, file); err != nil {return err}http.Post(url, writer.FormDataContentType(), buf)

这样buf会读取文件的所有内容,加入文件非常大,内存占用就会比较大
优化方法
r, w := io.Pipe() m := multipart.NewWriter(w) go func() { defer w.Close() defer m.Close() part, err := m.CreateFormFile("myFile", "foo.txt") if err != nil { return } file, err := os.Open(name) if err != nil { return } defer file.Close() if _, err = io.Copy(part, file); err != nil { return } }() http.Post(url, m.FormDataContentType(), r)

上述是代码是从原处拷贝,下面分析下原因
net/http 中
func Post(url, contentType string, body io.Reader) (resp *Response, err error) { ]return DefaultClient.Post(url, contentType, body) ]}func (c *Client) Post(url, contentType string, body io.Reader) (resp *Response, err error) { req, err := NewRequest("POST", url, body) if err != nil { return nil, err } req.Header.Set("Content-Type", contentType) return c.Do(req) }func NewRequest(method, url string, body io.Reader) (*Request, error) { ... if body != nil { switch v := body.(type) { case *bytes.Buffer: req.ContentLength = int64(v.Len()) buf := v.Bytes() req.GetBody = func() (io.ReadCloser, error) { r := bytes.NewReader(buf) return ioutil.NopCloser(r), nil } case *bytes.Reader: req.ContentLength = int64(v.Len()) snapshot := *v req.GetBody = func() (io.ReadCloser, error) { r := snapshot return ioutil.NopCloser(&r), nil } case *strings.Reader: req.ContentLength = int64(v.Len()) snapshot := *v req.GetBody = func() (io.ReadCloser, error) { r := snapshot return ioutil.NopCloser(&r), nil } default: // This is where we'd set it to -1 (at least // if body != NoBody) to mean unknown, but // that broke people during the Go 1.8 testing // period. People depend on it being 0 I // guess. Maybe retry later. See Issue 18117. } ... }

os中
func Pipe() (r *File, w *File, err error) { var p [2]inte := syscall.Pipe2(p[0:], syscall.O_CLOEXEC) // pipe2 was added in 2.6.27 and our minimum requirement is 2.6.23, so it // might not be implemented. if e == syscall.ENOSYS { // See ../syscall/exec.go for description of lock. syscall.ForkLock.RLock() e = syscall.Pipe(p[0:]) if e != nil { syscall.ForkLock.RUnlock() return nil, nil, NewSyscallError("pipe", e) } syscall.CloseOnExec(p[0]) syscall.CloseOnExec(p[1]) syscall.ForkLock.RUnlock() } else if e != nil { return nil, nil, NewSyscallError("pipe2", e) }return newFile(uintptr(p[0]), "|0", kindPipe), newFile(uintptr(p[1]), "|1", kindPipe), nil }

可见Pipe返回的类型在body的类型判断中进入了default的逻辑,而追溯post的方法会在此处写
func (t *transferWriter) writeBody(w io.Writer) error { ... if t.Body != nil { var body = transferBodyReader{t} if chunked(t.TransferEncoding) { if bw, ok := w.(*bufio.Writer); ok && !t.IsResponse { w = &internal.FlushAfterChunkWriter{Writer: bw} } cw := internal.NewChunkedWriter(w) _, err = io.Copy(cw, body) if err == nil { err = cw.Close() } } else if t.ContentLength == -1 { ncopy, err = io.Copy(w, body) } else { ncopy, err = io.Copy(w, io.LimitReader(body, t.ContentLength)) if err != nil { return err } var nextra int64 nextra, err = io.Copy(ioutil.Discard, body) ncopy += nextra } ... }

由于body不为nil,而且contentlength为0,所以进入了else的逻辑,也就形成了流式读取和流式写入,在大文件时候可以节省内存

    推荐阅读