add GDrive provider support (#118)

* GDrive provider support * More reliable basedir ownership * Fix mimetype
2018-06-19 15:30:26 +02:00
parent d0c7241b31
commit 82493d6dcb
1505 changed files with 2367574 additions and 5 deletions
--- a/vendor/cloud.google.com/go/bigquery/external.go
+++ b/vendor/cloud.google.com/go/bigquery/external.go
@@ -0,0 +1,399 @@
+// Copyright 2017 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bigquery
+
+import (
+	"encoding/base64"
+	"unicode/utf8"
+
+	bq "google.golang.org/api/bigquery/v2"
+)
+
+// DataFormat describes the format of BigQuery table data.
+type DataFormat string
+
+// Constants describing the format of BigQuery table data.
+const (
+	CSV             DataFormat = "CSV"
+	Avro            DataFormat = "AVRO"
+	JSON            DataFormat = "NEWLINE_DELIMITED_JSON"
+	DatastoreBackup DataFormat = "DATASTORE_BACKUP"
+	GoogleSheets    DataFormat = "GOOGLE_SHEETS"
+	Bigtable        DataFormat = "BIGTABLE"
+	Parquet         DataFormat = "PARQUET"
+)
+
+// ExternalData is a table which is stored outside of BigQuery. It is implemented by
+// *ExternalDataConfig.
+// GCSReference also implements it, for backwards compatibility.
+type ExternalData interface {
+	toBQ() bq.ExternalDataConfiguration
+}
+
+// ExternalDataConfig describes data external to BigQuery that can be used
+// in queries and to create external tables.
+type ExternalDataConfig struct {
+	// The format of the data. Required.
+	SourceFormat DataFormat
+
+	// The fully-qualified URIs that point to your
+	// data in Google Cloud. Required.
+	//
+	// For Google Cloud Storage URIs, each URI can contain one '*' wildcard character
+	// and it must come after the 'bucket' name. Size limits related to load jobs
+	// apply to external data sources.
+	//
+	// For Google Cloud Bigtable URIs, exactly one URI can be specified and it has be
+	// a fully specified and valid HTTPS URL for a Google Cloud Bigtable table.
+	//
+	// For Google Cloud Datastore backups, exactly one URI can be specified. Also,
+	// the '*' wildcard character is not allowed.
+	SourceURIs []string
+
+	// The schema of the data. Required for CSV and JSON; disallowed for the
+	// other formats.
+	Schema Schema
+
+	// Try to detect schema and format options automatically.
+	// Any option specified explicitly will be honored.
+	AutoDetect bool
+
+	// The compression type of the data.
+	Compression Compression
+
+	// IgnoreUnknownValues causes values not matching the schema to be
+	// tolerated. Unknown values are ignored. For CSV this ignores extra values
+	// at the end of a line. For JSON this ignores named values that do not
+	// match any column name. If this field is not set, records containing
+	// unknown values are treated as bad records. The MaxBadRecords field can
+	// be used to customize how bad records are handled.
+	IgnoreUnknownValues bool
+
+	// MaxBadRecords is the maximum number of bad records that will be ignored
+	// when reading data.
+	MaxBadRecords int64
+
+	// Additional options for CSV, GoogleSheets and Bigtable formats.
+	Options ExternalDataConfigOptions
+}
+
+func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
+	q := bq.ExternalDataConfiguration{
+		SourceFormat:        string(e.SourceFormat),
+		SourceUris:          e.SourceURIs,
+		Autodetect:          e.AutoDetect,
+		Compression:         string(e.Compression),
+		IgnoreUnknownValues: e.IgnoreUnknownValues,
+		MaxBadRecords:       e.MaxBadRecords,
+	}
+	if e.Schema != nil {
+		q.Schema = e.Schema.toBQ()
+	}
+	if e.Options != nil {
+		e.Options.populateExternalDataConfig(&q)
+	}
+	return q
+}
+
+func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfig, error) {
+	e := &ExternalDataConfig{
+		SourceFormat:        DataFormat(q.SourceFormat),
+		SourceURIs:          q.SourceUris,
+		AutoDetect:          q.Autodetect,
+		Compression:         Compression(q.Compression),
+		IgnoreUnknownValues: q.IgnoreUnknownValues,
+		MaxBadRecords:       q.MaxBadRecords,
+		Schema:              bqToSchema(q.Schema),
+	}
+	switch {
+	case q.CsvOptions != nil:
+		e.Options = bqToCSVOptions(q.CsvOptions)
+	case q.GoogleSheetsOptions != nil:
+		e.Options = bqToGoogleSheetsOptions(q.GoogleSheetsOptions)
+	case q.BigtableOptions != nil:
+		var err error
+		e.Options, err = bqToBigtableOptions(q.BigtableOptions)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return e, nil
+}
+
+// ExternalDataConfigOptions are additional options for external data configurations.
+// This interface is implemented by CSVOptions, GoogleSheetsOptions and BigtableOptions.
+type ExternalDataConfigOptions interface {
+	populateExternalDataConfig(*bq.ExternalDataConfiguration)
+}
+
+// CSVOptions are additional options for CSV external data sources.
+type CSVOptions struct {
+	// AllowJaggedRows causes missing trailing optional columns to be tolerated
+	// when reading CSV data. Missing values are treated as nulls.
+	AllowJaggedRows bool
+
+	// AllowQuotedNewlines sets whether quoted data sections containing
+	// newlines are allowed when reading CSV data.
+	AllowQuotedNewlines bool
+
+	// Encoding is the character encoding of data to be read.
+	Encoding Encoding
+
+	// FieldDelimiter is the separator for fields in a CSV file, used when
+	// reading or exporting data. The default is ",".
+	FieldDelimiter string
+
+	// Quote is the value used to quote data sections in a CSV file. The
+	// default quotation character is the double quote ("), which is used if
+	// both Quote and ForceZeroQuote are unset.
+	// To specify that no character should be interpreted as a quotation
+	// character, set ForceZeroQuote to true.
+	// Only used when reading data.
+	Quote          string
+	ForceZeroQuote bool
+
+	// The number of rows at the top of a CSV file that BigQuery will skip when
+	// reading data.
+	SkipLeadingRows int64
+}
+
+func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
+	c.CsvOptions = &bq.CsvOptions{
+		AllowJaggedRows:     o.AllowJaggedRows,
+		AllowQuotedNewlines: o.AllowQuotedNewlines,
+		Encoding:            string(o.Encoding),
+		FieldDelimiter:      o.FieldDelimiter,
+		Quote:               o.quote(),
+		SkipLeadingRows:     o.SkipLeadingRows,
+	}
+}
+
+// quote returns the CSV quote character, or nil if unset.
+func (o *CSVOptions) quote() *string {
+	if o.ForceZeroQuote {
+		quote := ""
+		return &quote
+	}
+	if o.Quote == "" {
+		return nil
+	}
+	return &o.Quote
+}
+
+func (o *CSVOptions) setQuote(ps *string) {
+	if ps != nil {
+		o.Quote = *ps
+		if o.Quote == "" {
+			o.ForceZeroQuote = true
+		}
+	}
+}
+
+func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions {
+	o := &CSVOptions{
+		AllowJaggedRows:     q.AllowJaggedRows,
+		AllowQuotedNewlines: q.AllowQuotedNewlines,
+		Encoding:            Encoding(q.Encoding),
+		FieldDelimiter:      q.FieldDelimiter,
+		SkipLeadingRows:     q.SkipLeadingRows,
+	}
+	o.setQuote(q.Quote)
+	return o
+}
+
+// GoogleSheetsOptions are additional options for GoogleSheets external data sources.
+type GoogleSheetsOptions struct {
+	// The number of rows at the top of a sheet that BigQuery will skip when
+	// reading data.
+	SkipLeadingRows int64
+}
+
+func (o *GoogleSheetsOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
+	c.GoogleSheetsOptions = &bq.GoogleSheetsOptions{
+		SkipLeadingRows: o.SkipLeadingRows,
+	}
+}
+
+func bqToGoogleSheetsOptions(q *bq.GoogleSheetsOptions) *GoogleSheetsOptions {
+	return &GoogleSheetsOptions{
+		SkipLeadingRows: q.SkipLeadingRows,
+	}
+}
+
+// BigtableOptions are additional options for Bigtable external data sources.
+type BigtableOptions struct {
+	// A list of column families to expose in the table schema along with their
+	// types. If omitted, all column families are present in the table schema and
+	// their values are read as BYTES.
+	ColumnFamilies []*BigtableColumnFamily
+
+	// If true, then the column families that are not specified in columnFamilies
+	// list are not exposed in the table schema. Otherwise, they are read with BYTES
+	// type values. The default is false.
+	IgnoreUnspecifiedColumnFamilies bool
+
+	// If true, then the rowkey column families will be read and converted to string.
+	// Otherwise they are read with BYTES type values and users need to manually cast
+	// them with CAST if necessary. The default is false.
+	ReadRowkeyAsString bool
+}
+
+func (o *BigtableOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
+	q := &bq.BigtableOptions{
+		IgnoreUnspecifiedColumnFamilies: o.IgnoreUnspecifiedColumnFamilies,
+		ReadRowkeyAsString:              o.ReadRowkeyAsString,
+	}
+	for _, f := range o.ColumnFamilies {
+		q.ColumnFamilies = append(q.ColumnFamilies, f.toBQ())
+	}
+	c.BigtableOptions = q
+}
+
+func bqToBigtableOptions(q *bq.BigtableOptions) (*BigtableOptions, error) {
+	b := &BigtableOptions{
+		IgnoreUnspecifiedColumnFamilies: q.IgnoreUnspecifiedColumnFamilies,
+		ReadRowkeyAsString:              q.ReadRowkeyAsString,
+	}
+	for _, f := range q.ColumnFamilies {
+		f2, err := bqToBigtableColumnFamily(f)
+		if err != nil {
+			return nil, err
+		}
+		b.ColumnFamilies = append(b.ColumnFamilies, f2)
+	}
+	return b, nil
+}
+
+// BigtableColumnFamily describes how BigQuery should access a Bigtable column family.
+type BigtableColumnFamily struct {
+	// Identifier of the column family.
+	FamilyID string
+
+	// Lists of columns that should be exposed as individual fields as opposed to a
+	// list of (column name, value) pairs. All columns whose qualifier matches a
+	// qualifier in this list can be accessed as .. Other columns can be accessed as
+	// a list through .Column field.
+	Columns []*BigtableColumn
+
+	// The encoding of the values when the type is not STRING. Acceptable encoding values are:
+	// - TEXT - indicates values are alphanumeric text strings.
+	// - BINARY - indicates values are encoded using HBase Bytes.toBytes family of functions.
+	// This can be overridden for a specific column by listing that column in 'columns' and
+	// specifying an encoding for it.
+	Encoding string
+
+	// If true, only the latest version of values are exposed for all columns in this
+	// column family. This can be overridden for a specific column by listing that
+	// column in 'columns' and specifying a different setting for that column.
+	OnlyReadLatest bool
+
+	// The type to convert the value in cells of this
+	// column family. The values are expected to be encoded using HBase
+	// Bytes.toBytes function when using the BINARY encoding value.
+	// Following BigQuery types are allowed (case-sensitive):
+	// BYTES STRING INTEGER FLOAT BOOLEAN.
+	// The default type is BYTES. This can be overridden for a specific column by
+	// listing that column in 'columns' and specifying a type for it.
+	Type string
+}
+
+func (b *BigtableColumnFamily) toBQ() *bq.BigtableColumnFamily {
+	q := &bq.BigtableColumnFamily{
+		FamilyId:       b.FamilyID,
+		Encoding:       b.Encoding,
+		OnlyReadLatest: b.OnlyReadLatest,
+		Type:           b.Type,
+	}
+	for _, col := range b.Columns {
+		q.Columns = append(q.Columns, col.toBQ())
+	}
+	return q
+}
+
+func bqToBigtableColumnFamily(q *bq.BigtableColumnFamily) (*BigtableColumnFamily, error) {
+	b := &BigtableColumnFamily{
+		FamilyID:       q.FamilyId,
+		Encoding:       q.Encoding,
+		OnlyReadLatest: q.OnlyReadLatest,
+		Type:           q.Type,
+	}
+	for _, col := range q.Columns {
+		c, err := bqToBigtableColumn(col)
+		if err != nil {
+			return nil, err
+		}
+		b.Columns = append(b.Columns, c)
+	}
+	return b, nil
+}
+
+// BigtableColumn describes how BigQuery should access a Bigtable column.
+type BigtableColumn struct {
+	// Qualifier of the column. Columns in the parent column family that have this
+	// exact qualifier are exposed as . field. The column field name is the
+	// same as the column qualifier.
+	Qualifier string
+
+	// If the qualifier is not a valid BigQuery field identifier i.e. does not match
+	// [a-zA-Z][a-zA-Z0-9_]*, a valid identifier must be provided as the column field
+	// name and is used as field name in queries.
+	FieldName string
+
+	// If true, only the latest version of values are exposed for this column.
+	// See BigtableColumnFamily.OnlyReadLatest.
+	OnlyReadLatest bool
+
+	// The encoding of the values when the type is not STRING.
+	// See BigtableColumnFamily.Encoding
+	Encoding string
+
+	// The type to convert the value in cells of this column.
+	// See BigtableColumnFamily.Type
+	Type string
+}
+
+func (b *BigtableColumn) toBQ() *bq.BigtableColumn {
+	q := &bq.BigtableColumn{
+		FieldName:      b.FieldName,
+		OnlyReadLatest: b.OnlyReadLatest,
+		Encoding:       b.Encoding,
+		Type:           b.Type,
+	}
+	if utf8.ValidString(b.Qualifier) {
+		q.QualifierString = b.Qualifier
+	} else {
+		q.QualifierEncoded = base64.RawStdEncoding.EncodeToString([]byte(b.Qualifier))
+	}
+	return q
+}
+
+func bqToBigtableColumn(q *bq.BigtableColumn) (*BigtableColumn, error) {
+	b := &BigtableColumn{
+		FieldName:      q.FieldName,
+		OnlyReadLatest: q.OnlyReadLatest,
+		Encoding:       q.Encoding,
+		Type:           q.Type,
+	}
+	if q.QualifierString != "" {
+		b.Qualifier = q.QualifierString
+	} else {
+		bytes, err := base64.RawStdEncoding.DecodeString(q.QualifierEncoded)
+		if err != nil {
+			return nil, err
+		}
+		b.Qualifier = string(bytes)
+	}
+	return b, nil
+}