cmd/swarm, swarm: cross-platform Content-Type detection (#17782)

- Mime types generator (Standard "mime" package rely on system-settings, see mime.osInitMime)
- Changed swarm/api.Upload:
    - simplify I/O throttling by semaphore primitive and use file name where possible
    - f.Close() must be called in Defer - otherwise panic or future added early return will cause leak of file descriptors
    - one error was suppressed
This commit is contained in:
Alexey Sharov
2018-10-01 18:39:39 +07:00
committed by Anton Evangelatov
parent b69942befe
commit dc5d643bb5
13 changed files with 3379 additions and 90 deletions

View File

@ -16,6 +16,9 @@
package api
//go:generate mimegen --types=./../../cmd/swarm/mimegen/mime.types --package=api --out=gen_mime.go
//go:generate gofmt -s -w gen_mime.go
import (
"archive/tar"
"context"
@ -29,8 +32,6 @@ import (
"path"
"strings"
"github.com/ethereum/go-ethereum/swarm/storage/mru/lookup"
"bytes"
"mime"
"path/filepath"
@ -45,7 +46,8 @@ import (
"github.com/ethereum/go-ethereum/swarm/spancontext"
"github.com/ethereum/go-ethereum/swarm/storage"
"github.com/ethereum/go-ethereum/swarm/storage/mru"
opentracing "github.com/opentracing/opentracing-go"
"github.com/ethereum/go-ethereum/swarm/storage/mru/lookup"
"github.com/opentracing/opentracing-go"
)
var (
@ -757,9 +759,14 @@ func (a *API) UploadTar(ctx context.Context, bodyReader io.ReadCloser, manifestP
// add the entry under the path from the request
manifestPath := path.Join(manifestPath, hdr.Name)
contentType := hdr.Xattrs["user.swarm.content-type"]
if contentType == "" {
contentType = mime.TypeByExtension(filepath.Ext(hdr.Name))
}
//DetectContentType("")
entry := &ManifestEntry{
Path: manifestPath,
ContentType: hdr.Xattrs["user.swarm.content-type"],
ContentType: contentType,
Mode: hdr.Mode,
Size: hdr.Size,
ModTime: hdr.ModTime,
@ -770,10 +777,15 @@ func (a *API) UploadTar(ctx context.Context, bodyReader io.ReadCloser, manifestP
return nil, fmt.Errorf("error adding manifest entry from tar stream: %s", err)
}
if hdr.Name == defaultPath {
contentType := hdr.Xattrs["user.swarm.content-type"]
if contentType == "" {
contentType = mime.TypeByExtension(filepath.Ext(hdr.Name))
}
entry := &ManifestEntry{
Hash: contentKey.Hex(),
Path: "", // default entry
ContentType: hdr.Xattrs["user.swarm.content-type"],
ContentType: contentType,
Mode: hdr.Mode,
Size: hdr.Size,
ModTime: hdr.ModTime,
@ -1033,3 +1045,32 @@ func (a *API) ResolveResourceView(ctx context.Context, uri *URI, values mru.Valu
}
return view, nil
}
// MimeOctetStream default value of http Content-Type header
const MimeOctetStream = "application/octet-stream"
// DetectContentType by file file extension, or fallback to content sniff
func DetectContentType(fileName string, f io.ReadSeeker) (string, error) {
ctype := mime.TypeByExtension(filepath.Ext(fileName))
if ctype != "" {
return ctype, nil
}
// save/rollback to get content probe from begin of file
currentPosition, err := f.Seek(0, io.SeekCurrent)
if err != nil {
return MimeOctetStream, fmt.Errorf("seeker can't seek, %s", err)
}
// read a chunk to decide between utf-8 text and binary
var buf [512]byte
n, _ := f.Read(buf[:])
ctype = http.DetectContentType(buf[:n])
_, err = f.Seek(currentPosition, io.SeekStart) // rewind to output whole file
if err != nil {
return MimeOctetStream, fmt.Errorf("seeker can't seek, %s", err)
}
return ctype, nil
}

View File

@ -17,6 +17,7 @@
package api
import (
"bytes"
"context"
"errors"
"flag"
@ -433,3 +434,69 @@ func TestDecryptOrigin(t *testing.T) {
}
}
}
func TestDetectContentType(t *testing.T) {
for _, tc := range []struct {
file string
content string
expectedContentType string
}{
{
file: "file-with-correct-css.css",
content: "body {background-color: orange}",
expectedContentType: "text/css; charset=utf-8",
},
{
file: "empty-file.css",
content: "",
expectedContentType: "text/css; charset=utf-8",
},
{
file: "empty-file.pdf",
content: "",
expectedContentType: "application/pdf",
},
{
file: "empty-file.md",
content: "",
expectedContentType: "text/markdown; charset=utf-8",
},
{
file: "empty-file-with-unknown-content.strangeext",
content: "",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "file-with-unknown-extension-and-content.strangeext",
content: "Lorem Ipsum",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "file-no-extension",
content: "Lorem Ipsum",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "file-no-extension-no-content",
content: "",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "css-file-with-html-inside.css",
content: "<!doctype html><html><head></head><body></body></html>",
expectedContentType: "text/css; charset=utf-8",
},
} {
t.Run(tc.file, func(t *testing.T) {
detected, err := DetectContentType(tc.file, bytes.NewReader([]byte(tc.content)))
if err != nil {
t.Fatal(err)
}
if detected != tc.expectedContentType {
t.Fatalf("File: %s, Expected mime type %s, got %s", tc.file, tc.expectedContentType, detected)
}
})
}
}

View File

@ -24,7 +24,6 @@ import (
"fmt"
"io"
"io/ioutil"
"mime"
"mime/multipart"
"net/http"
"net/textproto"
@ -124,10 +123,16 @@ func Open(path string) (*File, error) {
f.Close()
return nil, err
}
contentType, err := api.DetectContentType(f.Name(), f)
if err != nil {
return nil, err
}
return &File{
ReadCloser: f,
ManifestEntry: api.ManifestEntry{
ContentType: mime.TypeByExtension(filepath.Ext(path)),
ContentType: contentType,
Mode: int64(stat.Mode()),
Size: stat.Size(),
ModTime: stat.ModTime(),

View File

@ -21,7 +21,6 @@ import (
"context"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
@ -97,51 +96,50 @@ func (fs *FileSystem) Upload(lpath, index string, toEncrypt bool) (string, error
list = append(list, entry)
}
cnt := len(list)
errors := make([]error, cnt)
done := make(chan bool, maxParallelFiles)
dcnt := 0
awg := &sync.WaitGroup{}
errors := make([]error, len(list))
sem := make(chan bool, maxParallelFiles)
defer close(sem)
for i, entry := range list {
if i >= dcnt+maxParallelFiles {
<-done
dcnt++
}
awg.Add(1)
go func(i int, entry *manifestTrieEntry, done chan bool) {
sem <- true
go func(i int, entry *manifestTrieEntry) {
defer func() { <-sem }()
f, err := os.Open(entry.Path)
if err == nil {
stat, _ := f.Stat()
var hash storage.Address
var wait func(context.Context) error
ctx := context.TODO()
hash, wait, err = fs.api.fileStore.Store(ctx, f, stat.Size(), toEncrypt)
if hash != nil {
list[i].Hash = hash.Hex()
}
err = wait(ctx)
awg.Done()
if err == nil {
first512 := make([]byte, 512)
fread, _ := f.ReadAt(first512, 0)
if fread > 0 {
mimeType := http.DetectContentType(first512[:fread])
if filepath.Ext(entry.Path) == ".css" {
mimeType = "text/css"
}
list[i].ContentType = mimeType
}
}
f.Close()
if err != nil {
errors[i] = err
return
}
errors[i] = err
done <- true
}(i, entry, done)
defer f.Close()
stat, err := f.Stat()
if err != nil {
errors[i] = err
return
}
var hash storage.Address
var wait func(context.Context) error
ctx := context.TODO()
hash, wait, err = fs.api.fileStore.Store(ctx, f, stat.Size(), toEncrypt)
if hash != nil {
list[i].Hash = hash.Hex()
}
if err := wait(ctx); err != nil {
errors[i] = err
return
}
list[i].ContentType, err = DetectContentType(f.Name(), f)
if err != nil {
errors[i] = err
return
}
}(i, entry)
}
for dcnt < cnt {
<-done
dcnt++
for i := 0; i < cap(sem); i++ {
sem <- true
}
trie := &manifestTrie{
@ -168,7 +166,6 @@ func (fs *FileSystem) Upload(lpath, index string, toEncrypt bool) (string, error
if err2 == nil {
hs = trie.ref.Hex()
}
awg.Wait()
return hs, err2
}

View File

@ -60,7 +60,7 @@ func TestApiDirUpload0(t *testing.T) {
content = readPath(t, "testdata", "test0", "index.css")
resp = testGet(t, api, bzzhash, "index.css")
exp = expResponse(content, "text/css", 0)
exp = expResponse(content, "text/css; charset=utf-8", 0)
checkResponse(t, resp, exp)
addr := storage.Address(common.Hex2Bytes(bzzhash))
@ -140,7 +140,7 @@ func TestApiDirUploadModify(t *testing.T) {
content = readPath(t, "testdata", "test0", "index.css")
resp = testGet(t, api, bzzhash, "index.css")
exp = expResponse(content, "text/css", 0)
exp = expResponse(content, "text/css; charset=utf-8", 0)
checkResponse(t, resp, exp)
_, _, _, _, err = api.Get(context.TODO(), nil, addr, "")

1201
swarm/api/gen_mime.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -201,6 +201,13 @@ func (s *Server) HandleBzzGet(w http.ResponseWriter, r *http.Request) {
defer reader.Close()
w.Header().Set("Content-Type", "application/x-tar")
fileName := uri.Addr
if found := path.Base(uri.Path); found != "" && found != "." && found != "/" {
fileName = found
}
w.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=\"%s.tar\"", fileName))
w.WriteHeader(http.StatusOK)
io.Copy(w, reader)
return
@ -616,7 +623,7 @@ func (s *Server) HandleGetResource(w http.ResponseWriter, r *http.Request) {
// All ok, serve the retrieved update
log.Debug("Found update", "view", view.Hex(), "ruid", ruid)
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Type", api.MimeOctetStream)
http.ServeContent(w, r, "", time.Now(), bytes.NewReader(data))
}
@ -690,11 +697,9 @@ func (s *Server) HandleGet(w http.ResponseWriter, r *http.Request) {
case uri.Raw():
// allow the request to overwrite the content type using a query
// parameter
contentType := "application/octet-stream"
if typ := r.URL.Query().Get("content_type"); typ != "" {
contentType = typ
w.Header().Set("Content-Type", typ)
}
w.Header().Set("Content-Type", contentType)
http.ServeContent(w, r, "", time.Now(), reader)
case uri.Hash():
w.Header().Set("Content-Type", "text/plain")
@ -850,8 +855,17 @@ func (s *Server) HandleGetFile(w http.ResponseWriter, r *http.Request) {
return
}
w.Header().Set("Content-Type", contentType)
http.ServeContent(w, r, "", time.Now(), newBufferedReadSeeker(reader, getFileBufferSize))
if contentType != "" {
w.Header().Set("Content-Type", contentType)
}
fileName := uri.Addr
if found := path.Base(uri.Path); found != "" && found != "." && found != "/" {
fileName = found
}
w.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=\"%s\"", fileName))
http.ServeContent(w, r, fileName, time.Now(), newBufferedReadSeeker(reader, getFileBufferSize))
}
// The size of buffer used for bufio.Reader on LazyChunkReader passed to

View File

@ -32,6 +32,7 @@ import (
"net/http"
"net/url"
"os"
"path"
"strconv"
"strings"
"testing"
@ -764,6 +765,16 @@ func testBzzTar(encrypted bool, t *testing.T) {
}
defer resp2.Body.Close()
if h := resp2.Header.Get("Content-Type"); h != "application/x-tar" {
t.Fatalf("Content-Type header expected: application/x-tar, got: %s", h)
}
expectedFileName := string(swarmHash) + ".tar"
expectedContentDisposition := fmt.Sprintf("inline; filename=\"%s\"", expectedFileName)
if h := resp2.Header.Get("Content-Disposition"); h != expectedContentDisposition {
t.Fatalf("Content-Disposition header expected: %s, got: %s", expectedContentDisposition, h)
}
file, err := ioutil.TempFile("", "swarm-downloaded-tarball")
if err != nil {
t.Fatal(err)
@ -1099,7 +1110,7 @@ func TestModify(t *testing.T) {
res, body := httpDo(testCase.method, testCase.uri, reqBody, testCase.headers, testCase.verbose, t)
if res.StatusCode != testCase.expectedStatusCode {
t.Fatalf("expected status code %d but got %d", testCase.expectedStatusCode, res.StatusCode)
t.Fatalf("expected status code %d but got %d, %s", testCase.expectedStatusCode, res.StatusCode, body)
}
if testCase.assertResponseBody != "" && !strings.Contains(body, testCase.assertResponseBody) {
t.Log(body)
@ -1210,19 +1221,25 @@ func TestBzzGetFileWithResolver(t *testing.T) {
hash := common.HexToHash(string(swarmHash))
resolver.hash = &hash
for _, v := range []struct {
addr string
path string
expectedStatusCode int
addr string
path string
expectedStatusCode int
expectedContentType string
expectedFileName string
}{
{
addr: string(swarmHash),
path: fileNames[0],
expectedStatusCode: http.StatusOK,
addr: string(swarmHash),
path: fileNames[0],
expectedStatusCode: http.StatusOK,
expectedContentType: "text/plain",
expectedFileName: path.Base(fileNames[0]),
},
{
addr: "somebogusensname",
path: fileNames[0],
expectedStatusCode: http.StatusOK,
addr: "somebogusensname",
path: fileNames[0],
expectedStatusCode: http.StatusOK,
expectedContentType: "text/plain",
expectedFileName: path.Base(fileNames[0]),
},
} {
req, err := http.NewRequest("GET", fmt.Sprintf(srv.URL+"/bzz:/%s/%s", v.addr, v.path), nil)
@ -1237,6 +1254,16 @@ func TestBzzGetFileWithResolver(t *testing.T) {
if serverResponse.StatusCode != v.expectedStatusCode {
t.Fatalf("expected %d, got %d", v.expectedStatusCode, serverResponse.StatusCode)
}
if h := serverResponse.Header.Get("Content-Type"); h != v.expectedContentType {
t.Fatalf("Content-Type header expected: %s, got %s", v.expectedContentType, h)
}
expectedContentDisposition := fmt.Sprintf("inline; filename=\"%s\"", v.expectedFileName)
if h := serverResponse.Header.Get("Content-Disposition"); h != expectedContentDisposition {
t.Fatalf("Content-Disposition header expected: %s, got: %s", expectedContentDisposition, h)
}
}
}