talent-plan-tinykv/kv/coprocessor/analyze.go
理工男 9d03198268
Update analyze.go (#391)
Copyright is duplicated
2022-04-20 09:36:49 +00:00

306 lines
8.2 KiB
Go

// Copyright 2019-present PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package coprocessor
import (
"bytes"
"time"
"github.com/Connor1996/badger/y"
"github.com/golang/protobuf/proto"
"github.com/juju/errors"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/transaction/mvcc"
"github.com/pingcap-incubator/tinykv/proto/pkg/coprocessor"
"github.com/pingcap-incubator/tinykv/kv/coprocessor/rowcodec"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tipb/go-tipb"
"golang.org/x/net/context"
)
func (svr *CopHandler) HandleCopAnalyzeRequest(reader storage.StorageReader, req *coprocessor.Request) *coprocessor.Response {
resp := &coprocessor.Response{}
if len(req.Ranges) == 0 {
return resp
}
if req.GetTp() != kv.ReqTypeAnalyze {
return resp
}
analyzeReq := new(tipb.AnalyzeReq)
err := proto.Unmarshal(req.Data, analyzeReq)
if err != nil {
resp.OtherError = err.Error()
return resp
}
ranges, err := svr.extractKVRanges(reader, req.Ranges, false)
if err != nil {
resp.OtherError = err.Error()
return resp
}
y.Assert(len(ranges) == 1)
if analyzeReq.Tp == tipb.AnalyzeType_TypeIndex {
resp, err = svr.handleAnalyzeIndexReq(reader, ranges[0], analyzeReq, req.StartTs)
} else {
resp, err = svr.handleAnalyzeColumnsReq(reader, ranges[0], analyzeReq, req.StartTs)
}
if err != nil {
resp = &coprocessor.Response{
OtherError: err.Error(),
}
}
return resp
}
func (svr *CopHandler) handleAnalyzeIndexReq(reader storage.StorageReader, ran kv.KeyRange, analyzeReq *tipb.AnalyzeReq, startTS uint64) (*coprocessor.Response, error) {
processor := &analyzeIndexProcessor{
colLen: int(analyzeReq.IdxReq.NumColumns),
statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob)),
}
if analyzeReq.IdxReq.CmsketchDepth != nil && analyzeReq.IdxReq.CmsketchWidth != nil {
processor.cms = statistics.NewCMSketch(*analyzeReq.IdxReq.CmsketchDepth, *analyzeReq.IdxReq.CmsketchWidth)
}
txn := mvcc.MvccTxn{Reader: reader, StartTS: startTS}
scanner := mvcc.NewScanner(ran.StartKey, &txn)
defer scanner.Close()
for {
key, val, err := scanner.Next()
if err != nil {
return nil, err
}
if key == nil && val == nil {
break
}
if bytes.Compare(key, ran.EndKey) >= 0 {
break
}
err = processor.Process(key, val)
if err != nil {
if err == ScanBreak {
break
}
return nil, err
}
}
hg := statistics.HistogramToProto(processor.statsBuilder.Hist())
var cm *tipb.CMSketch
if processor.cms != nil {
cm = statistics.CMSketchToProto(processor.cms)
}
data, err := proto.Marshal(&tipb.AnalyzeIndexResp{Hist: hg, Cms: cm})
if err != nil {
return nil, errors.Trace(err)
}
return &coprocessor.Response{Data: data}, nil
}
type analyzeIndexProcessor struct {
skipVal
colLen int
statsBuilder *statistics.SortedBuilder
cms *statistics.CMSketch
rowBuf []byte
}
func (p *analyzeIndexProcessor) Process(key, value []byte) error {
values, _, err := tablecodec.CutIndexKeyNew(key, p.colLen)
if err != nil {
return err
}
p.rowBuf = p.rowBuf[:0]
for _, val := range values {
p.rowBuf = append(p.rowBuf, val...)
}
rowData := append([]byte{}, p.rowBuf...)
err = p.statsBuilder.Iterate(types.NewBytesDatum(rowData))
if err != nil {
return err
}
if p.cms != nil {
p.cms.InsertBytes(rowData)
}
return nil
}
type analyzeColumnsExec struct {
skipVal
reader storage.StorageReader
seekKey []byte
endKey []byte
startTS uint64
chk *chunk.Chunk
decoder *rowcodec.Decoder
req *chunk.Chunk
evalCtx *evalContext
fields []*ast.ResultField
}
func (svr *CopHandler) handleAnalyzeColumnsReq(reader storage.StorageReader, ran kv.KeyRange, analyzeReq *tipb.AnalyzeReq, startTS uint64) (*coprocessor.Response, error) {
sc := flagsToStatementContext(analyzeReq.Flags)
sc.TimeZone = time.FixedZone("UTC", int(analyzeReq.TimeZoneOffset))
evalCtx := &evalContext{sc: sc}
columns := analyzeReq.ColReq.ColumnsInfo
evalCtx.setColumnInfo(columns)
decoder, err := evalCtx.newRowDecoder()
if err != nil {
return nil, err
}
e := &analyzeColumnsExec{
reader: reader,
seekKey: ran.StartKey,
endKey: ran.EndKey,
startTS: startTS,
chk: chunk.NewChunkWithCapacity(evalCtx.fieldTps, 1),
decoder: decoder,
evalCtx: evalCtx,
}
e.fields = make([]*ast.ResultField, len(columns))
for i := range e.fields {
rf := new(ast.ResultField)
rf.Column = new(model.ColumnInfo)
rf.Column.FieldType = types.FieldType{Tp: mysql.TypeBlob, Flen: mysql.MaxBlobWidth, Charset: charset.CharsetUTF8, Collate: charset.CollationUTF8}
e.fields[i] = rf
}
pkID := int64(-1)
numCols := len(columns)
if columns[0].GetPkHandle() {
pkID = columns[0].ColumnId
numCols--
}
colReq := analyzeReq.ColReq
builder := statistics.SampleBuilder{
Sc: sc,
RecordSet: e,
ColLen: numCols,
MaxBucketSize: colReq.BucketSize,
MaxFMSketchSize: colReq.SketchSize,
MaxSampleSize: colReq.SampleSize,
}
if pkID != -1 {
builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob))
}
if colReq.CmsketchWidth != nil && colReq.CmsketchDepth != nil {
builder.CMSketchWidth = *colReq.CmsketchWidth
builder.CMSketchDepth = *colReq.CmsketchDepth
}
collectors, pkBuilder, err := builder.CollectColumnStats()
if err != nil {
return nil, errors.Trace(err)
}
colResp := &tipb.AnalyzeColumnsResp{}
if pkID != -1 {
colResp.PkHist = statistics.HistogramToProto(pkBuilder.Hist())
}
for _, c := range collectors {
colResp.Collectors = append(colResp.Collectors, statistics.SampleCollectorToProto(c))
}
data, err := proto.Marshal(colResp)
if err != nil {
return nil, errors.Trace(err)
}
return &coprocessor.Response{Data: data}, nil
}
// Fields implements the sqlexec.RecordSet Fields interface.
func (e *analyzeColumnsExec) Fields() []*ast.ResultField {
return e.fields
}
func (e *analyzeColumnsExec) Next(ctx context.Context, req *chunk.Chunk) error {
req.Reset()
e.req = req
processor := e
txn := mvcc.MvccTxn{Reader: e.reader, StartTS: e.startTS}
scanner := mvcc.NewScanner(e.seekKey, &txn)
defer scanner.Close()
for {
key, val, err := scanner.Next()
if err != nil {
return err
}
if key == nil && val == nil {
break
}
if bytes.Compare(key, e.endKey) >= 0 {
break
}
err = processor.Process(key, val)
if err != nil {
if err == ScanBreak {
break
}
return err
}
}
if req.NumRows() < req.Capacity() {
e.seekKey = e.endKey
}
return nil
}
func (e *analyzeColumnsExec) Process(key, value []byte) error {
handle, err := tablecodec.DecodeRowKey(key)
if err != nil {
return errors.Trace(err)
}
err = e.decoder.Decode(value, handle, e.chk)
if err != nil {
return errors.Trace(err)
}
row := e.chk.GetRow(0)
for i, tp := range e.evalCtx.fieldTps {
d := row.GetDatum(i, tp)
value, err := codec.EncodeValue(e.evalCtx.sc, nil, d)
if err != nil {
return err
}
e.req.AppendBytes(i, value)
}
e.chk.Reset()
if e.req.NumRows() == e.req.Capacity() {
e.seekKey = kv.Key(key).PrefixNext()
return ScanBreak
}
return nil
}
func (e *analyzeColumnsExec) NewChunk() *chunk.Chunk {
fields := make([]*types.FieldType, 0, len(e.fields))
for _, field := range e.fields {
fields = append(fields, &field.Column.FieldType)
}
return chunk.NewChunkWithCapacity(fields, 1024)
}
// Close implements the sqlexec.RecordSet Close interface.
func (e *analyzeColumnsExec) Close() error {
return nil
}