init course framework

Signed-off-by: Connor <zbk602423539@gmail.com>
Co-authored-by: Nick Cameron <nrc@ncameron.org>
Co-authored-by: linning <linningde25@gmail.com>
Co-authored-by: YangKeao <keao.yang@yahoo.com>
Co-authored-by: andylokandy <andylokandy@hotmail.com>
Co-authored-by: Iosmanthus Teng <myosmanthustree@gmail.com>
This commit is contained in:
Connor 2020-04-30 15:07:27 +08:00
commit 5e089a2cd1
235 changed files with 83600 additions and 0 deletions

20
.github/workflows/go.yml vendored Normal file
View File

@ -0,0 +1,20 @@
name: Go
on: [push,pull_request]
jobs:
build:
name: Build & Test
runs-on: ubuntu-latest
steps:
- name: Set up Go 1.13
uses: actions/setup-go@v1
with:
go-version: 1.13
id: go
- name: Check out code into the Go module directory
uses: actions/checkout@v1
- name: Build & Test
run: make ci

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
.idea*
.vscode*
node/node
unikv/unikv
bin/*
_tools

65
Makefile Normal file
View File

@ -0,0 +1,65 @@
SHELL := /bin/bash
PROJECT=tinykv
GOPATH ?= $(shell go env GOPATH)
# Ensure GOPATH is set before running build process.
ifeq "$(GOPATH)" ""
$(error Please set the environment variable GOPATH before running `make`)
endif
GO := GO111MODULE=on go
GOBUILD := $(GO) build $(BUILD_FLAG) -tags codes
GOTEST := $(GO) test -p 8
TEST_LDFLAGS := ""
PACKAGE_LIST := go list ./...| grep -vE "cmd"
PACKAGES := $$($(PACKAGE_LIST))
# Targets
.PHONY: clean test proto kv scheduler dev
default: kv scheduler
dev: default test
test:
@echo "Running tests in native mode."
@export TZ='Asia/Shanghai'; \
$(GOTEST) -cover $(PACKAGES)
CURDIR := $(shell pwd)
export PATH := $(CURDIR)/bin/:$(PATH)
proto:
mkdir -p $(CURDIR)/bin
(cd proto && ./generate_go.sh)
GO111MODULE=on go build ./proto/pkg/...
kv:
$(GOBUILD) -o bin/tinykv-server kv/main.go
scheduler:
$(GOBUILD) -o bin/tinyscheduler-server scheduler/main.go
ci: default test
@echo "Checking formatting"
@test -z "$$(gofmt -s -l $$(find . -name '*.go' -type f -print) | tee /dev/stderr)"
@echo "Running Go vet"
@go vet ./...
format:
@gofmt -s -w `find . -name '*.go' -type f ! -path '*/_tools/*' -print`
lab1:
go test -count=1 ./kv/server -run 1
lab2: lab2a lab2b lab2c
lab2a:
go test -count=1 ./raft -run 2A
lab2b:
go test -count=1 ./kv/test_raftstore -run 2B
lab2c:
go test -count=1 ./raft ./kv/test_raftstore -run 2C

49
README.md Normal file
View File

@ -0,0 +1,49 @@
# The TinyKV LAB
This is a series of labs on a key-value storage system built with the Raft consensus algorithm. These labs are inspired by the famous [MIT 6.824](http://nil.csail.mit.edu/6.824/2018/index.html) course, but aim to be closer to industry implementations. The whole lab is pruned from [TiKV](github.com/tikv/tikv) and re-written in Go. After completing this course, you will have the knowledge to implement a basic key-value storage service with distributed transactions and fault-tolerance and better understanding of TiKV implementation.
The whole project is a skeleton code for a kv server and a scheduler server at initial, and you need to finish the core logic step by step:
- LAB1: build a standalone key-value server
- LAB2: build a fault tolerant key-value server with Raft
- LAB3: support multi Raft group and balance scheduling on top of LAB2
- LAB4: support distributed transaction on top of LAB3
**Important note: This course is still in developing, and the document is incomplete.** Any feedback and contribution is greatly appreciated. Please see help wanted issues if you want to join in the development.
## Build
```
make
```
## Test
```
make test
```
## Run(Not runnable now)
Put the binary of `tinyscheduler-server`, `tinykv-server` and `tidb-server` into a single dir.
Under the binary dir, run the following commands:
```
mkdir -p data
```
```
./tinyscheduler-server
```
```
./tinykv-server --db-path=data
```
```
./tinysql-server --store=tikv --path="127.0.0.1:2379"
```
## Documentation(Incomplete)
This repo contains a single module: tinykv. Each package is documented either in a doc.go file or, if it is a single
file package, in the single file.
See [doc.go](doc.go) for top-level documentation.

39
go.mod Normal file
View File

@ -0,0 +1,39 @@
module github.com/pingcap-incubator/tinykv
require (
github.com/BurntSushi/toml v0.3.1
github.com/Connor1996/badger v1.5.1-0.20200306031920-9bbcbd8ba570
github.com/coocood/badger v1.5.1-0.20191220113928-eaffd0ec7a8c // indirect
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f
github.com/docker/go-units v0.4.0
github.com/gogo/protobuf v1.2.1
github.com/golang/protobuf v1.3.2
github.com/google/btree v1.0.0
github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5
github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect
github.com/montanaflynn/stats v0.0.0-20180911141734-db72e6cae808
github.com/opentracing/opentracing-go v1.0.2
github.com/petar/GoLLRB v0.0.0-20190514000832-33fb24c13b99
github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8
github.com/pingcap/errcode v0.0.0-20180921232412-a1a7271709d9
github.com/pingcap/errors v0.11.4
github.com/pingcap/kvproto v0.0.0-20190821201150-798d27658fae
github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596
github.com/pingcap/parser v0.0.0-20190903084634-0daf3f706c76
github.com/pingcap/tidb v1.1.0-beta.0.20190904060835-0872b65ff1f9
github.com/pkg/errors v0.8.1
github.com/shirou/gopsutil v2.18.10+incompatible
github.com/sirupsen/logrus v1.2.0
github.com/stretchr/testify v1.3.0
go.etcd.io/etcd v0.0.0-20190320044326-77d4b742cdbf
go.uber.org/zap v1.9.1
golang.org/x/net v0.0.0-20190620200207-3b0461eec859
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect
google.golang.org/grpc v1.17.0
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce // indirect
gopkg.in/natefinch/lumberjack.v2 v2.0.0
gopkg.in/stretchr/testify.v1 v1.2.2 // indirect
)
go 1.13

417
go.sum Normal file
View File

@ -0,0 +1,417 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9 h1:HD8gA2tkByhMAwYaFAX9w2l7vxvBQ5NMoxDrkhqhtn4=
github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Connor1996/badger v1.5.0 h1:GIETn+enyBZZEs6SNXCGZafzf4W9bf83Ec/Rtf7+ExU=
github.com/Connor1996/badger v1.5.0/go.mod h1:i5tVv6WOnUfrDvUwyNe70leuRyNIBfTpXX9I8CoxQjQ=
github.com/Connor1996/badger v1.5.1-0.20200220043901-cee19f7bcf4f h1:xNCYfucq8ErQ0obR9SirnxbCXtzwQkORy8KhtKuf0/Q=
github.com/Connor1996/badger v1.5.1-0.20200220043901-cee19f7bcf4f/go.mod h1:eDy3lZfjgEs4EC8pePI7y/Qx509ylx/S94y/dimtkxc=
github.com/Connor1996/badger v1.5.1-0.20200302104252-6a1b02b1cb4c h1:5BlmQypu3/umx1JZw5sE5lv1zdPMRl1NQLUYCvoRgUk=
github.com/Connor1996/badger v1.5.1-0.20200302104252-6a1b02b1cb4c/go.mod h1:eDy3lZfjgEs4EC8pePI7y/Qx509ylx/S94y/dimtkxc=
github.com/Connor1996/badger v1.5.1-0.20200306031920-9bbcbd8ba570 h1:fDz4LQTBQynUkcYDLzVQ8nz4HKoRu4zga7/MGXV/e8s=
github.com/Connor1996/badger v1.5.1-0.20200306031920-9bbcbd8ba570/go.mod h1:eDy3lZfjgEs4EC8pePI7y/Qx509ylx/S94y/dimtkxc=
github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM=
github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/StackExchange/wmi v0.0.0-20180725035823-b12b22c5341f h1:5ZfJxyXo8KyX8DgGXC5B7ILL8y51fci/qYz2B4j8iLY=
github.com/StackExchange/wmi v0.0.0-20180725035823-b12b22c5341f/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLMYoU8P317H5OQ+Via4RmuPwCS0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/blacktear23/go-proxyprotocol v0.0.0-20180807104634-af7a81e8dd0d/go.mod h1:VKt7CNAQxpFpSDz3sXyj9hY/GbVsQCr0sB3w59nE7lU=
github.com/brianvoe/gofakeit v3.18.0+incompatible/go.mod h1:kfwdRA90vvNhPutZWfH7WPaDzUjz+CZFqG+rPkOjGOc=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM=
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20171208011716-f6d7a1f6fbf3 h1:T7Bw4H6z3WAZ2khw+gfKdYmbKHyy5xiHtk9IHfZqm7g=
github.com/chzyer/readline v0.0.0-20171208011716-f6d7a1f6fbf3/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w=
github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI=
github.com/coocood/badger v1.5.1-0.20191220113928-eaffd0ec7a8c h1:3LUmJxDuX+9aSQl0Szun1ZkfARHD/2NOgcRd2ncyOZI=
github.com/coocood/badger v1.5.1-0.20191220113928-eaffd0ec7a8c/go.mod h1:nWOxoEl8pfb73mZNB38uYvFfNOykho6REd5J6VerzjM=
github.com/coocood/bbloom v0.0.0-20190830030839-58deb6228d64 h1:W1SHiII3e0jVwvaQFglwu3kS9NLxOeTpvik7MbKCyuQ=
github.com/coocood/bbloom v0.0.0-20190830030839-58deb6228d64/go.mod h1:F86k/6c7aDUdwSUevnLpHS/3Q9hzYCE99jGk2xsHnt0=
github.com/coocood/rtutil v0.0.0-20190304133409-c84515f646f2 h1:NnLfQ77q0G4k2Of2c1ceQ0ec6MkLQyDp+IGdVM0D8XM=
github.com/coocood/rtutil v0.0.0-20190304133409-c84515f646f2/go.mod h1:7qG7YFnOALvsx6tKTNmQot8d7cGFXM9TidzvRFLWYwM=
github.com/coreos/bbolt v1.3.3 h1:n6AiVyVRKQFNb6mJlwESEvvLoDyiTzXX7ORAUlkeBdY=
github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/etcd v3.3.13+incompatible h1:8F3hqu9fGYLBifCmRCJsicFqDx/D68Rt3q1JMazcgBQ=
github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-semver v0.2.0 h1:3Jm3tLmsgAYcjC+4Up7hJrFBPr+n7rAqYeSw/SZazuY=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20181031085051-9002847aa142 h1:3jFq2xL4ZajGK4aZY8jz+DAF0FHjI51BXjjSwCzS1Dk=
github.com/coreos/go-systemd v0.0.0-20181031085051-9002847aa142/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso=
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM=
github.com/cznic/sortutil v0.0.0-20150617083342-4c7342852e65 h1:hxuZop6tSoOi0sxFzoGGYdRqNrPubyaIf9KoBG9tPiE=
github.com/cznic/sortutil v0.0.0-20150617083342-4c7342852e65/go.mod h1:q2w6Bg5jeox1B+QkJ6Wp/+Vn0G/bo3f1uY7Fn3vivIQ=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgraph-io/badger v1.6.0 h1:DshxFxZWXUcO0xX476VJC07Xsr6ZCBVRHKZ93Oh7Evo=
github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
github.com/dgraph-io/ristretto v0.0.0-20191010170704-2ba187ef9534 h1:9G6fVccQriMJu4nXwpwLDoy9y31t/KUSLAbPcoBgv+4=
github.com/dgraph-io/ristretto v0.0.0-20191010170704-2ba187ef9534/go.mod h1:edzKIzGvqUCMzhTVWbiTSe75zD9Xxq0GtSBtFmaUTZs=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-farm v0.0.0-20190104051053-3adb47b1fb0f h1:dDxpBYafY/GYpcl+LS4Bn3ziLPuEdGRkRjYAbSlWxSA=
github.com/dgryski/go-farm v0.0.0-20190104051053-3adb47b1fb0f/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o=
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 h1:Mn26/9ZMNWSw9C9ERFA1PUxfmGpolnw2v0bKOREu5ew=
github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
github.com/go-ole/go-ole v1.2.1 h1:2lOsA72HgjxAuMlKpFiCbHTvu44PIVkZ5hqm3RSdI/E=
github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8=
github.com/go-playground/overalls v0.0.0-20180201144345-22ec1a223b7c/go.mod h1:UqxAgEOt89sCiXlrc/ycnx00LVvUO/eS8tMUkWX4R7w=
github.com/go-sql-driver/mysql v0.0.0-20170715192408-3955978caca4/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.0.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.0 h1:xU6/SpYbvkNYiptHJYEDRseDLvYE7wSqhYYNy0QSUzI=
github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20181024230925-c65c006176ff h1:kOkM9whyQYodu09SJ6W3NCsHG7crFaJILQ22Gozp3lg=
github.com/golang/groupcache v0.0.0-20181024230925-c65c006176ff/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c h1:964Od4U6p2jUkFxvCydnIczKteheJEzHRToSGK3Bnlw=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf/go.mod h1:RpwtwJQFrIEPstU94h88MWPXP2ektJZ8cZ0YntAmXiE=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/mux v1.6.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk=
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gorilla/websocket v1.2.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.4.1/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
github.com/grpc-ecosystem/grpc-gateway v1.5.1 h1:3scN4iuXkNOyP98jF55Lv8a9j1o/IwvnDIZ0LHJK1nk=
github.com/grpc-ecosystem/grpc-gateway v1.5.1/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/json-iterator/go v1.1.6 h1:MrUvLMLTMxbqFJ9kzlvat/rYZqZnW3u4wkLzWTaFwKs=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5 h1:rhqTjzJlm7EbkELJDKMTU7udov+Se0xZkWmugr6zGok=
github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q=
github.com/juju/loggo v0.0.0-20180524022052-584905176618 h1:MK144iBQF9hTSwBW/9eJm034bVoG30IshVm688T2hi8=
github.com/juju/loggo v0.0.0-20180524022052-584905176618/go.mod h1:vgyd7OREkbtVEN/8IXZe5Ooef3LQePvuBm9UWj6ZL8U=
github.com/juju/ratelimit v1.0.1 h1:+7AIFJVQ0EQgq/K9+0Krm7m530Du7tIz0METWzN0RgY=
github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk=
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 h1:WQM1NildKThwdP7qWrNAFGzp4ijNLw8RlgENkaI4MJs=
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073/go.mod h1:63prj8cnj0tU0S9OHjGJn+b1h0ZghCndfnbQolrYTwA=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5 h1:2U0HzY8BJ8hVwDKIzp7y4voR9CX/nvcfymLmg2UiOio=
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.0.0/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-shellwords v1.0.3 h1:K/VxK7SZ+cvuPgFSLKi5QPI9Vr/ipOf4C1gN+ntueUk=
github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
github.com/matttproud/golang_protobuf_extensions v1.0.0/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/montanaflynn/stats v0.0.0-20151014174947-eeaced052adb/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/montanaflynn/stats v0.0.0-20180911141734-db72e6cae808 h1:pmpDGKLw4n82EtrNiLqB+xSz/JQwFOaZuMALYUHwX5s=
github.com/montanaflynn/stats v0.0.0-20180911141734-db72e6cae808/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/myesui/uuid v1.0.0 h1:xCBmH4l5KuvLYc5L7AS7SZg9/jKdIFubM7OVoLqaQUI=
github.com/myesui/uuid v1.0.0/go.mod h1:2CDfNgU0LR8mIdO8vdWd8i9gWWxLlcoIGGpSNgafq84=
github.com/ncw/directio v1.0.4 h1:CojwI07mCEmRkajgx42Pf8jyCwTs1ji9/Ij9/PJG12k=
github.com/ncw/directio v1.0.4/go.mod h1:CKGdcN7StAaqjT7Qack3lAXeX4pjnyc46YeqZH1yWVY=
github.com/ngaut/log v0.0.0-20180314031856-b8e36e7ba5ac h1:wyheT2lPXRQqYPWY2IVW5BTLrbqCsnhL61zK2R5goLA=
github.com/ngaut/log v0.0.0-20180314031856-b8e36e7ba5ac/go.mod h1:ueVCjKQllPmX7uEvCYnZD5b8qjidGf1TCH61arVe4SU=
github.com/ngaut/pools v0.0.0-20180318154953-b7bc8c42aac7 h1:7KAv7KMGTTqSmYZtNdcNTgsos+vFzULLwyElndwn+5c=
github.com/ngaut/pools v0.0.0-20180318154953-b7bc8c42aac7/go.mod h1:iWMfgwqYW+e8n5lC/jjNEhwcjbRDpl5NT7n2h+4UNcI=
github.com/ngaut/sync2 v0.0.0-20141008032647-7a24ed77b2ef h1:K0Fn+DoFqNqktdZtdV3bPQ/0cuYh2H4rkg0tytX/07k=
github.com/ngaut/sync2 v0.0.0-20141008032647-7a24ed77b2ef/go.mod h1:7WjlapSfwQyo6LNmIvEWzsW1hbBQfpUO4JWnuQRmva8=
github.com/nicksnyder/go-i18n v1.10.0/go.mod h1:HrK7VCrbOvQoUAQ7Vpy7i87N7JZZZ7R2xBGjv0j365Q=
github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.4.2 h1:3mYCb7aPxS/RU7TI1y4rkEn1oKmPRjNJLNEXgw7MH2I=
github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/opentracing/basictracer-go v1.0.0 h1:YyUAhaEfjoWXclZVJ9sGoNct7j4TVk7lZWlQw5UXuoo=
github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74=
github.com/opentracing/opentracing-go v1.0.2 h1:3jA2P6O1F9UOrWVpwrIo17pu01KWvNWg4X946/Y5Zwg=
github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pelletier/go-toml v1.3.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo=
github.com/petar/GoLLRB v0.0.0-20190514000832-33fb24c13b99 h1:KcEvVBAvyHkUdFAygKAzwB6LAcZ6LS32WHmRD2VyXMI=
github.com/petar/GoLLRB v0.0.0-20190514000832-33fb24c13b99/go.mod h1:HUpKUBZnpzkdx0kD/+Yfuft+uD3zHGtXF/XJB14TUr4=
github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FIw034Au6seQ2fY9NEILmNh/UlQg=
github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ=
github.com/pingcap/errcode v0.0.0-20180921232412-a1a7271709d9 h1:KH4f4Si9XK6/IW50HtoaiLIFHGkapOM6w83za47UYik=
github.com/pingcap/errcode v0.0.0-20180921232412-a1a7271709d9/go.mod h1:4b2X8xSqxIroj/IZ9MX/VGZhAwc11wB9wRIzHvz6SeM=
github.com/pingcap/errors v0.10.1/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/errors v0.11.0 h1:DCJQB8jrHbQ1VVlMFIrbj2ApScNNotVmkSNplu2yUt4=
github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/failpoint v0.0.0-20190512135322-30cc7431d99c h1:hvQd3aOLKLF7xvRV6DzvPkKY4QXzfVbjU1BhW0d9yL8=
github.com/pingcap/failpoint v0.0.0-20190512135322-30cc7431d99c/go.mod h1:DNS3Qg7bEDhU6EXNHF+XSv/PGznQaMJ5FWvctpm6pQI=
github.com/pingcap/goleveldb v0.0.0-20171020122428-b9ff6c35079e h1:P73/4dPCL96rGrobssy1nVy2VaVpNCuLpCbr+FEaTA8=
github.com/pingcap/goleveldb v0.0.0-20171020122428-b9ff6c35079e/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20190516013202-4cf58ad90b6c/go.mod h1:QMdbTAXCHzzygQzqcG9uVUgU2fKeSN1GmfMiykdSzzY=
github.com/pingcap/kvproto v0.0.0-20190821201150-798d27658fae h1:WR4d5ga8zXT+QDWYFzzyA+PJMMszR0kQxyYMh6dvHPg=
github.com/pingcap/kvproto v0.0.0-20190821201150-798d27658fae/go.mod h1:QMdbTAXCHzzygQzqcG9uVUgU2fKeSN1GmfMiykdSzzY=
github.com/pingcap/log v0.0.0-20190214045112-b37da76f67a7/go.mod h1:xsfkWVaFVV5B8e1K9seWfyJWFrIhbtUTAD8NV1Pq3+w=
github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ=
github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw=
github.com/pingcap/parser v0.0.0-20190903084634-0daf3f706c76 h1:q8d5NIRT/Urmb5woYWhlrMER8nDV33tjyvJMqODI2Rk=
github.com/pingcap/parser v0.0.0-20190903084634-0daf3f706c76/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA=
github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b/go.mod h1:3DlDlFT7EF64A1bmb/tulZb6wbPSagm5G4p1AlhaEDs=
github.com/pingcap/tidb v1.1.0-beta.0.20190904060835-0872b65ff1f9 h1:Fg4wHf1wd50v0RR+GPIIGrcnCVI/LSXKUYoB5ON1t6k=
github.com/pingcap/tidb v1.1.0-beta.0.20190904060835-0872b65ff1f9/go.mod h1:vLe4ZQRrNZ98B0W6BMZJ2MFlGuLNhMO0gYLL7o7QHiE=
github.com/pingcap/tidb-tools v2.1.3-0.20190321065848-1e8b48f5c168+incompatible h1:MkWCxgZpJBgY2f4HtwWMMFzSBb3+JPzeJgF3VrXE/bU=
github.com/pingcap/tidb-tools v2.1.3-0.20190321065848-1e8b48f5c168+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM=
github.com/pingcap/tipb v0.0.0-20190806070524-16909e03435e h1:H7meq8QPmWGImOkHTQYAWw82zwIqndJaCDPVUknOHbM=
github.com/pingcap/tipb v0.0.0-20190806070524-16909e03435e/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.0 h1:tXuTFVHC03mW0D+Ua1Q2d1EAVqLTuggX50V0VLICCzY=
github.com/prometheus/client_golang v0.9.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 h1:idejC8f05m9MGOsuEi1ATq9shN03HrxNkD/luQvxCv8=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/common v0.0.0-20180518154759-7600349dcfe1/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.0.0-20181020173914-7e9e6cabbd39 h1:Cto4X6SVMWRPBkJ/3YHn1iDGDGc/Z+sW+AEMKHMVvN4=
github.com/prometheus/common v0.0.0-20181020173914-7e9e6cabbd39/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/procfs v0.0.0-20180612222113-7d6f385de8be/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFdaDqxJVlbOQ1DtGmZWs/Qau0hIlk+WQ=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7 h1:FUL3b97ZY2EPqg2NbXKuMHs5pXJB9hjj1fDHnF2vl28=
github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/shirou/gopsutil v2.18.10+incompatible h1:cy84jW6EVRPa5g9HAHrlbxMSIjBhDSX0OFYyMYminYs=
github.com/shirou/gopsutil v2.18.10+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shurcooL/httpfs v0.0.0-20171119174359-809beceb2371/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg=
github.com/shurcooL/vfsgen v0.0.0-20181020040650-a97a25d856ca/go.mod h1:TrYk7fJVaAttu97ZZKrO9UbRa8izdowaMIZcxYMbVaw=
github.com/sirupsen/logrus v1.0.5/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/struCoder/pidusage v0.1.2/go.mod h1:pWBlW3YuSwRl6h7R5KbvA4N8oOqe9LjaKW5CwT1SPjI=
github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d h1:4J9HCZVpvDmj2tiKGSTUnb3Ok/9CEQb9oqu9LHKQQpc=
github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0=
github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2/go.mod h1:2PfKggNGDuadAa0LElHrByyrz4JPZ9fFx6Gs7nx7ZZU=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6 h1:lYIiVDtZnyTWlNwiAxLj0bbpTcx1BWCFhXjfsvmPdNc=
github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/twinj/uuid v1.0.0 h1:fzz7COZnDrXGTAOHGuUGYd6sG+JMq+AoE7+Jlu0przk=
github.com/twinj/uuid v1.0.0/go.mod h1:mMgcE1RHFUFqe5AfiwlINXisXfDGro23fWdPUfOMjRY=
github.com/uber-go/atomic v1.3.2 h1:Azu9lPBWRNKzYXSIwRfgRuDuS0YKsK4NFhiQv98gkxo=
github.com/uber-go/atomic v1.3.2/go.mod h1:/Ct5t2lcmbJ4OSe/waGBoaVvVqtO0bmtfVNex1PFV8g=
github.com/uber/jaeger-client-go v2.15.0+incompatible h1:NP3qsSqNxh8VYr956ur1N/1C1PjvOJnJykCzcD5QHbk=
github.com/uber/jaeger-client-go v2.15.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
github.com/uber/jaeger-lib v1.5.0 h1:OHbgr8l656Ub3Fw5k9SWnBfIEwvoHQ+W2y+Aa9D1Uyo=
github.com/uber/jaeger-lib v1.5.0/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
github.com/ugorji/go v1.1.2 h1:JON3E2/GPW2iDNGoSAusl1KDf5TRQ8k8q7Tp097pZGs=
github.com/ugorji/go v1.1.2/go.mod h1:hnLbHMwcvSihnDhEfx2/BzKp2xb0Y+ErdfYcrs9tkJQ=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8 h1:3SVOIvH7Ae1KRYyQWRjXWJEA9sS/c/pjvH++55Gr648=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/ugorji/go/codec v0.0.0-20190204201341-e444a5086c43 h1:BasDe+IErOQKrMVXab7UayvSlIpiyGwRvuX3EKYY7UA=
github.com/ugorji/go/codec v0.0.0-20190204201341-e444a5086c43/go.mod h1:iT03XoTwV7xq/+UGwKO3UbC1nNNlopQiY61beSdrtOA=
github.com/unrolled/render v0.0.0-20171102162132-65450fb6b2d3/go.mod h1:tu82oB5W2ykJRVioYsB+IQKcft7ryBr7w12qMBUPyXg=
github.com/unrolled/render v0.0.0-20180914162206-b9786414de4d h1:ggUgChAeyge4NZ4QUw6lhHsVymzwSDJOZcE0s2X8S20=
github.com/unrolled/render v0.0.0-20180914162206-b9786414de4d/go.mod h1:tu82oB5W2ykJRVioYsB+IQKcft7ryBr7w12qMBUPyXg=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/negroni v0.3.0 h1:PaXOb61mWeZJxc1Ji2xJjpVg9QfPo0rrB+lHyBxGNSU=
github.com/urfave/negroni v0.3.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/etcd v0.0.0-20190320044326-77d4b742cdbf h1:rmttwKPEgG/l4UscTDYtaJgeUsedKPKSyFfNQLI6q+I=
go.etcd.io/etcd v0.0.0-20190320044326-77d4b742cdbf/go.mod h1:KSGwdbiFchh5KIC9My2+ZVl5/3ANcwohw50dpPwa2cw=
go.uber.org/atomic v1.3.2 h1:2Oa65PReHzfn29GpvgsYwloV9AVFHPDk8tYxt2c2tr4=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.9.1 h1:XCJQEf3W6eZaVwhRBof6ImoYGJSITeKWsyeh3HFu/5o=
go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180608092829-8ac0e0d97ce4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793 h1:u+LnwYTOOW7Ukr/fppxEb1Nwz0AtPflrblfvUudpo+I=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190109145017-48ac38b7c8cb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c h1:fqgJT0MGcGpPgpWU7VRdRjuArfcOvC4AoJmILihzhDg=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52 h1:JG/0uqcGdTNgq7FdU+61l5Pdmb8putNZlXb65bJBROs=
golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190130214255-bb1329dc71a0 h1:iRpjPej1fPzmfoBhMFkp3HdqzF+ytPmAwiQhJGV0zGw=
golang.org/x/tools v0.0.0-20190130214255-bb1329dc71a0/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/genproto v0.0.0-20180608181217-32ee49c4dd80/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190108161440-ae2f86662275 h1:9oFlwfEGIvmxXTcY53ygNyxIQtWciRHjrnUvZJCYXYU=
google.golang.org/genproto v0.0.0-20190108161440-ae2f86662275/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg=
google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.16.0 h1:dz5IJGuC2BB7qXR5AyHNwAUBhZscK2xVez7mznh72sY=
google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
google.golang.org/grpc v1.17.0 h1:TRJYBgMclJvGYn2rIMjj+h9KtMt5r1Ij7ODVRIZkwhk=
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U=
gopkg.in/alecthomas/gometalinter.v2 v2.0.12/go.mod h1:NDRytsqEZyolNuAgTzJkZMkSQM7FIKyzVzGhjB/qfYo=
gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c/go.mod h1:3HH7i1SgMqlzxCcBmUHW657sD4Kvv9sC3HpL3YukzwA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo=
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce h1:xcEWjVhvbDy+nHP67nPDDpbYrY+ILlfndk4bRioVHaU=
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
gopkg.in/stretchr/testify.v1 v1.2.2 h1:yhQC6Uy5CqibAIlk1wlusa/MJ3iAN49/BsR/dCCKz3M=
gopkg.in/stretchr/testify.v1 v1.2.2/go.mod h1:QI5V/q6UbPmuhtm10CaFZxED9NreB8PnFYN9JcR6TxU=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
sourcegraph.com/sourcegraph/appdash v0.0.0-20180531100431-4c381bd170b4 h1:VO9oZbbkvTwqLimlQt15QNdOOBArT2dw/bvzsMZBiqQ=
sourcegraph.com/sourcegraph/appdash v0.0.0-20180531100431-4c381bd170b4/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU=
sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67/go.mod h1:L5q+DGLGOQFpo1snNEkLOJT2d1YTW66rWNzatr3He1k=

99
kv/config/config.go Normal file
View File

@ -0,0 +1,99 @@
package config
import (
"fmt"
"time"
"github.com/pingcap-incubator/tinykv/log"
)
type Config struct {
StoreAddr string
Raft bool
SchedulerAddr string
LogLevel string
DBPath string // Directory to store the data in. Should exist and be writable.
// raft_base_tick_interval is a base tick interval (ms).
RaftBaseTickInterval time.Duration
RaftHeartbeatTicks int
RaftElectionTimeoutTicks int
// Interval to gc unnecessary raft log (ms).
RaftLogGCTickInterval time.Duration
// When entry count exceed this value, gc will be forced trigger.
RaftLogGcCountLimit uint64
// Interval (ms) to check region whether need to be split or not.
SplitRegionCheckTickInterval time.Duration
// delay time before deleting a stale peer
SchedulerHeartbeatTickInterval time.Duration
SchedulerStoreHeartbeatTickInterval time.Duration
// When region [a,e) size meets regionMaxSize, it will be split into
// several regions [a,b), [b,c), [c,d), [d,e). And the size of [a,b),
// [b,c), [c,d) will be regionSplitSize (maybe a little larger).
RegionMaxSize uint64
RegionSplitSize uint64
}
func (c *Config) Validate() error {
if c.RaftHeartbeatTicks == 0 {
return fmt.Errorf("heartbeat tick must greater than 0")
}
if c.RaftElectionTimeoutTicks != 10 {
log.Warnf("Election timeout ticks needs to be same across all the cluster, " +
"otherwise it may lead to inconsistency.")
}
if c.RaftElectionTimeoutTicks <= c.RaftHeartbeatTicks {
return fmt.Errorf("election tick must be greater than heartbeat tick.")
}
return nil
}
const (
KB uint64 = 1024
MB uint64 = 1024 * 1024
)
func NewDefaultConfig() *Config {
return &Config{
SchedulerAddr: "127.0.0.1:2379",
StoreAddr: "127.0.0.1:20160",
LogLevel: "info",
RaftBaseTickInterval: 1 * time.Second,
RaftHeartbeatTicks: 2,
RaftElectionTimeoutTicks: 10,
RaftLogGCTickInterval: 10 * time.Second,
// Assume the average size of entries is 1k.
RaftLogGcCountLimit: 128000,
SplitRegionCheckTickInterval: 10 * time.Second,
SchedulerHeartbeatTickInterval: 100 * time.Millisecond,
SchedulerStoreHeartbeatTickInterval: 10 * time.Second,
RegionMaxSize: 144 * MB,
RegionSplitSize: 96 * MB,
DBPath: "/tmp/badger",
}
}
func NewTestConfig() *Config {
return &Config{
LogLevel: "info",
RaftBaseTickInterval: 10 * time.Millisecond,
RaftHeartbeatTicks: 2,
RaftElectionTimeoutTicks: 10,
RaftLogGCTickInterval: 50 * time.Millisecond,
// Assume the average size of entries is 1k.
RaftLogGcCountLimit: 128000,
SplitRegionCheckTickInterval: 100 * time.Millisecond,
SchedulerHeartbeatTickInterval: 100 * time.Millisecond,
SchedulerStoreHeartbeatTickInterval: 500 * time.Millisecond,
RegionMaxSize: 144 * MB,
RegionSplitSize: 96 * MB,
DBPath: "/tmp/badger",
}
}

View File

@ -0,0 +1,231 @@
package rowcodec
import (
"encoding/binary"
"fmt"
"reflect"
"strings"
"unsafe"
"github.com/juju/errors"
)
// CodecVer is the constant number that represent the new row format.
const CodecVer = 128
var invalidCodecVer = errors.New("invalid codec version")
// First byte in the encoded value which specifies the encoding type.
const (
NilFlag byte = 0
BytesFlag byte = 1
CompactBytesFlag byte = 2
IntFlag byte = 3
UintFlag byte = 4
VarintFlag byte = 8
VaruintFlag byte = 9
)
// row is the struct type used to access the a row.
type row struct {
// small: colID []byte, offsets []uint16, optimized for most cases.
// large: colID []uint32, offsets []uint32.
large bool
numNotNullCols uint16
numNullCols uint16
colIDs []byte
// valFlags is used for converting new row format to old row format.
// It can be removed once TiDB implemented the new row format.
valFlags []byte
offsets []uint16
data []byte
// for large row
colIDs32 []uint32
offsets32 []uint32
}
// String implements the strings.Stringer interface.
func (r row) String() string {
var colValStrs []string
for i := 0; i < int(r.numNotNullCols); i++ {
var colID, offStart, offEnd int64
if r.large {
colID = int64(r.colIDs32[i])
if i != 0 {
offStart = int64(r.offsets32[i-1])
}
offEnd = int64(r.offsets32[i])
} else {
colID = int64(r.colIDs[i])
if i != 0 {
offStart = int64(r.offsets[i-1])
}
offEnd = int64(r.offsets[i])
}
colValData := r.data[offStart:offEnd]
valFlag := r.valFlags[i]
var colValStr string
if valFlag == BytesFlag {
colValStr = fmt.Sprintf("(%d:'%s')", colID, colValData)
} else {
colValStr = fmt.Sprintf("(%d:%d)", colID, colValData)
}
colValStrs = append(colValStrs, colValStr)
}
return strings.Join(colValStrs, ",")
}
func (r *row) getData(i int) []byte {
var start, end uint32
if r.large {
if i > 0 {
start = r.offsets32[i-1]
}
end = r.offsets32[i]
} else {
if i > 0 {
start = uint32(r.offsets[i-1])
}
end = uint32(r.offsets[i])
}
return r.data[start:end]
}
func (r *row) setRowData(rowData []byte) error {
if rowData[0] != CodecVer {
return invalidCodecVer
}
r.large = rowData[1]&1 > 0
r.numNotNullCols = binary.LittleEndian.Uint16(rowData[2:])
r.numNullCols = binary.LittleEndian.Uint16(rowData[4:])
cursor := 6
r.valFlags = rowData[cursor : cursor+int(r.numNotNullCols)]
cursor += int(r.numNotNullCols)
if r.large {
colIDsLen := int(r.numNotNullCols+r.numNullCols) * 4
r.colIDs32 = bytesToU32Slice(rowData[cursor : cursor+colIDsLen])
cursor += colIDsLen
offsetsLen := int(r.numNotNullCols) * 4
r.offsets32 = bytesToU32Slice(rowData[cursor : cursor+offsetsLen])
cursor += offsetsLen
} else {
colIDsLen := int(r.numNotNullCols + r.numNullCols)
r.colIDs = rowData[cursor : cursor+colIDsLen]
cursor += colIDsLen
offsetsLen := int(r.numNotNullCols) * 2
r.offsets = bytes2U16Slice(rowData[cursor : cursor+offsetsLen])
cursor += offsetsLen
}
r.data = rowData[cursor:]
return nil
}
func bytesToU32Slice(b []byte) []uint32 {
if len(b) == 0 {
return nil
}
var u32s []uint32
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&u32s))
hdr.Len = len(b) / 4
hdr.Cap = hdr.Len
hdr.Data = uintptr(unsafe.Pointer(&b[0]))
return u32s
}
func bytes2U16Slice(b []byte) []uint16 {
if len(b) == 0 {
return nil
}
var u16s []uint16
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&u16s))
hdr.Len = len(b) / 2
hdr.Cap = hdr.Len
hdr.Data = uintptr(unsafe.Pointer(&b[0]))
return u16s
}
func u16SliceToBytes(u16s []uint16) []byte {
if len(u16s) == 0 {
return nil
}
var b []byte
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
hdr.Len = len(u16s) * 2
hdr.Cap = hdr.Len
hdr.Data = uintptr(unsafe.Pointer(&u16s[0]))
return b
}
func u32SliceToBytes(u32s []uint32) []byte {
if len(u32s) == 0 {
return nil
}
var b []byte
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
hdr.Len = len(u32s) * 4
hdr.Cap = hdr.Len
hdr.Data = uintptr(unsafe.Pointer(&u32s[0]))
return b
}
func encodeInt(buf []byte, iVal int64) []byte {
var tmp [8]byte
if int64(int8(iVal)) == iVal {
buf = append(buf, byte(iVal))
} else if int64(int16(iVal)) == iVal {
binary.LittleEndian.PutUint16(tmp[:], uint16(iVal))
buf = append(buf, tmp[:2]...)
} else if int64(int32(iVal)) == iVal {
binary.LittleEndian.PutUint32(tmp[:], uint32(iVal))
buf = append(buf, tmp[:4]...)
} else {
binary.LittleEndian.PutUint64(tmp[:], uint64(iVal))
buf = append(buf, tmp[:8]...)
}
return buf
}
func decodeInt(val []byte) int64 {
switch len(val) {
case 1:
return int64(int8(val[0]))
case 2:
return int64(int16(binary.LittleEndian.Uint16(val)))
case 4:
return int64(int32(binary.LittleEndian.Uint32(val)))
default:
return int64(binary.LittleEndian.Uint64(val))
}
}
func encodeUint(buf []byte, uVal uint64) []byte {
var tmp [8]byte
if uint64(uint8(uVal)) == uVal {
buf = append(buf, byte(uVal))
} else if uint64(uint16(uVal)) == uVal {
binary.LittleEndian.PutUint16(tmp[:], uint16(uVal))
buf = append(buf, tmp[:2]...)
} else if uint64(uint32(uVal)) == uVal {
binary.LittleEndian.PutUint32(tmp[:], uint32(uVal))
buf = append(buf, tmp[:4]...)
} else {
binary.LittleEndian.PutUint64(tmp[:], uint64(uVal))
buf = append(buf, tmp[:8]...)
}
return buf
}
func decodeUint(val []byte) uint64 {
switch len(val) {
case 1:
return uint64(val[0])
case 2:
return uint64(binary.LittleEndian.Uint16(val))
case 4:
return uint64(binary.LittleEndian.Uint32(val))
default:
return binary.LittleEndian.Uint64(val)
}
}

View File

@ -0,0 +1,254 @@
package rowcodec
import (
"math"
"time"
"github.com/juju/errors"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/types/json"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
)
// Decoder decodes the row to chunk.Chunk.
type Decoder struct {
row
requestColIDs []int64
handleColID int64
requestTypes []*types.FieldType
origDefaults [][]byte
loc *time.Location
}
// NewDecoder creates a NewDecoder.
// requestColIDs is the columnIDs to decode. tps is the field types for request columns.
// origDefault is the original default value in old format, if the column ID is not found in the row,
// the origDefault will be used.
func NewDecoder(requestColIDs []int64, handleColID int64, tps []*types.FieldType, origDefaults [][]byte,
loc *time.Location) (*Decoder, error) {
xOrigDefaultVals := make([][]byte, len(origDefaults))
for i := 0; i < len(origDefaults); i++ {
if len(origDefaults[i]) == 0 {
continue
}
xDefaultVal, err := convertDefaultValue(origDefaults[i])
if err != nil {
return nil, err
}
xOrigDefaultVals[i] = xDefaultVal
}
return &Decoder{
requestColIDs: requestColIDs,
handleColID: handleColID,
requestTypes: tps,
origDefaults: xOrigDefaultVals,
loc: loc,
}, nil
}
func convertDefaultValue(defaultVal []byte) (colVal []byte, err error) {
var d types.Datum
_, d, err = codec.DecodeOne(defaultVal)
if err != nil {
return
}
switch d.Kind() {
case types.KindNull:
return nil, nil
case types.KindInt64:
return encodeInt(nil, d.GetInt64()), nil
case types.KindUint64:
return encodeUint(nil, d.GetUint64()), nil
case types.KindString, types.KindBytes:
return d.GetBytes(), nil
case types.KindFloat32:
return encodeUint(nil, uint64(math.Float32bits(d.GetFloat32()))), nil
case types.KindFloat64:
return encodeUint(nil, math.Float64bits(d.GetFloat64())), nil
default:
return defaultVal[1:], nil
}
}
// Decode decodes a row to chunk.
func (decoder *Decoder) Decode(rowData []byte, handle int64, chk *chunk.Chunk) error {
err := decoder.setRowData(rowData)
if err != nil {
return err
}
for colIdx, colID := range decoder.requestColIDs {
if colID == decoder.handleColID {
chk.AppendInt64(colIdx, handle)
continue
}
// Search the column in not-null columns array.
i, j := 0, int(decoder.numNotNullCols)
var found bool
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
var v int64
if decoder.large {
v = int64(decoder.colIDs32[h])
} else {
v = int64(decoder.colIDs[h])
}
if v < colID {
i = h + 1
} else if v > colID {
j = h
} else {
found = true
colData := decoder.getData(h)
err := decoder.decodeColData(colIdx, colData, chk)
if err != nil {
return err
}
break
}
}
if found {
continue
}
defaultVal := decoder.origDefaults[colIdx]
if decoder.isNull(colID, defaultVal) {
chk.AppendNull(colIdx)
} else {
err := decoder.decodeColData(colIdx, defaultVal, chk)
if err != nil {
return err
}
}
}
return nil
}
// ColumnIsNull returns if the column value is null. Mainly used for count column aggregation.
func (decoder *Decoder) ColumnIsNull(rowData []byte, colID int64, defaultVal []byte) (bool, error) {
err := decoder.setRowData(rowData)
if err != nil {
return false, err
}
// Search the column in not-null columns array.
i, j := 0, int(decoder.numNotNullCols)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
var v int64
if decoder.large {
v = int64(decoder.colIDs32[h])
} else {
v = int64(decoder.colIDs[h])
}
if v < colID {
i = h + 1
} else if v > colID {
j = h
} else {
return false, nil
}
}
return decoder.isNull(colID, defaultVal), nil
}
func (decoder *Decoder) isNull(colID int64, defaultVal []byte) bool {
// Search the column in null columns array.
i, j := int(decoder.numNotNullCols), int(decoder.numNotNullCols+decoder.numNullCols)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
var v int64
if decoder.large {
v = int64(decoder.colIDs32[h])
} else {
v = int64(decoder.colIDs[h])
}
if v < colID {
i = h + 1
} else if v > colID {
j = h
} else {
return true
}
}
return defaultVal == nil
}
func (decoder *Decoder) decodeColData(colIdx int, colData []byte, chk *chunk.Chunk) error {
ft := decoder.requestTypes[colIdx]
switch ft.Tp {
case mysql.TypeLonglong, mysql.TypeLong, mysql.TypeInt24, mysql.TypeShort, mysql.TypeTiny, mysql.TypeYear:
if mysql.HasUnsignedFlag(ft.Flag) {
chk.AppendUint64(colIdx, decodeUint(colData))
} else {
chk.AppendInt64(colIdx, decodeInt(colData))
}
case mysql.TypeFloat:
_, fVal, err := codec.DecodeFloat(colData)
if err != nil {
return err
}
chk.AppendFloat32(colIdx, float32(fVal))
case mysql.TypeDouble:
_, fVal, err := codec.DecodeFloat(colData)
if err != nil {
return err
}
chk.AppendFloat64(colIdx, fVal)
case mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeString,
mysql.TypeBlob, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob:
chk.AppendBytes(colIdx, colData)
case mysql.TypeNewDecimal:
_, dec, _, _, err := codec.DecodeDecimal(colData)
if err != nil {
return err
}
chk.AppendMyDecimal(colIdx, dec)
case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
var t types.Time
t.Type = ft.Tp
t.Fsp = int8(ft.Decimal)
err := t.FromPackedUint(decodeUint(colData))
if err != nil {
return err
}
if ft.Tp == mysql.TypeTimestamp && !t.IsZero() {
err = t.ConvertTimeZone(time.UTC, decoder.loc)
if err != nil {
return err
}
}
chk.AppendTime(colIdx, t)
case mysql.TypeDuration:
var dur types.Duration
dur.Duration = time.Duration(decodeInt(colData))
dur.Fsp = int8(ft.Decimal)
chk.AppendDuration(colIdx, dur)
case mysql.TypeEnum:
// ignore error deliberately, to read empty enum value.
enum, err := types.ParseEnumValue(ft.Elems, decodeUint(colData))
if err != nil {
enum = types.Enum{}
}
chk.AppendEnum(colIdx, enum)
case mysql.TypeSet:
set, err := types.ParseSetValue(ft.Elems, decodeUint(colData))
if err != nil {
return err
}
chk.AppendSet(colIdx, set)
case mysql.TypeBit:
byteSize := (ft.Flen + 7) >> 3
chk.AppendBytes(colIdx, types.NewBinaryLiteralFromUint(decodeUint(colData), byteSize))
case mysql.TypeJSON:
var j json.BinaryJSON
j.TypeCode = colData[0]
j.Value = colData[1:]
chk.AppendJSON(colIdx, j)
default:
return errors.Errorf("unknown type %d", ft.Tp)
}
return nil
}

View File

@ -0,0 +1,368 @@
package rowcodec
import (
"math"
"sort"
"time"
"github.com/juju/errors"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
)
// Encoder is used to encode a row.
type Encoder struct {
row
tempColIDs []int64
values []types.Datum
tempData []byte
}
func (encoder *Encoder) reset() {
encoder.large = false
encoder.numNotNullCols = 0
encoder.numNullCols = 0
encoder.data = encoder.data[:0]
encoder.tempColIDs = encoder.tempColIDs[:0]
encoder.values = encoder.values[:0]
}
func (encoder *Encoder) addColumn(colID int64, d types.Datum) {
if colID > 255 {
encoder.large = true
}
if d.IsNull() {
encoder.numNullCols++
} else {
encoder.numNotNullCols++
}
encoder.tempColIDs = append(encoder.tempColIDs, colID)
encoder.values = append(encoder.values, d)
}
// Encode encodes a row from a datums slice.
func (encoder *Encoder) Encode(colIDs []int64, values []types.Datum, buf []byte) ([]byte, error) {
encoder.reset()
for i, colID := range colIDs {
encoder.addColumn(colID, values[i])
}
return encoder.build(buf[:0])
}
// EncodeFromOldRow encodes a row from an old-format row.
func (encoder *Encoder) EncodeFromOldRow(oldRow, buf []byte) ([]byte, error) {
encoder.reset()
for len(oldRow) > 1 {
var d types.Datum
var err error
oldRow, d, err = codec.DecodeOne(oldRow)
if err != nil {
return nil, err
}
colID := d.GetInt64()
oldRow, d, err = codec.DecodeOne(oldRow)
if err != nil {
return nil, err
}
encoder.addColumn(colID, d)
}
return encoder.build(buf[:0])
}
func (encoder *Encoder) build(buf []byte) ([]byte, error) {
r := &encoder.row
// Separate null and not-null column IDs.
numCols := len(encoder.tempColIDs)
nullIdx := numCols - int(r.numNullCols)
notNullIdx := 0
if r.large {
encoder.initColIDs32()
encoder.initOffsets32()
} else {
encoder.initColIDs()
encoder.initOffsets()
}
for i, colID := range encoder.tempColIDs {
if encoder.values[i].IsNull() {
if r.large {
r.colIDs32[nullIdx] = uint32(colID)
} else {
r.colIDs[nullIdx] = byte(colID)
}
nullIdx++
} else {
if r.large {
r.colIDs32[notNullIdx] = uint32(colID)
} else {
r.colIDs[notNullIdx] = byte(colID)
}
encoder.values[notNullIdx] = encoder.values[i]
notNullIdx++
}
}
if r.large {
largeNotNullSorter := (*largeNotNullSorter)(encoder)
sort.Sort(largeNotNullSorter)
if r.numNullCols > 0 {
largeNullSorter := (*largeNullSorter)(encoder)
sort.Sort(largeNullSorter)
}
} else {
smallNotNullSorter := (*smallNotNullSorter)(encoder)
sort.Sort(smallNotNullSorter)
if r.numNullCols > 0 {
smallNullSorter := (*smallNullSorter)(encoder)
sort.Sort(smallNullSorter)
}
}
encoder.initValFlags()
for i := 0; i < notNullIdx; i++ {
d := encoder.values[i]
switch d.Kind() {
case types.KindInt64:
r.valFlags[i] = IntFlag
r.data = encodeInt(r.data, d.GetInt64())
case types.KindUint64:
r.valFlags[i] = UintFlag
r.data = encodeUint(r.data, d.GetUint64())
case types.KindString, types.KindBytes:
r.valFlags[i] = BytesFlag
r.data = append(r.data, d.GetBytes()...)
default:
var err error
encoder.tempData, err = codec.EncodeValue(defaultStmtCtx, encoder.tempData[:0], d)
if err != nil {
return nil, errors.Trace(err)
}
r.valFlags[i] = encoder.tempData[0]
r.data = append(r.data, encoder.tempData[1:]...)
}
if len(r.data) > math.MaxUint16 && !r.large {
// We need to convert the row to large row.
encoder.initColIDs32()
for j := 0; j < numCols; j++ {
r.colIDs32[j] = uint32(r.colIDs[j])
}
encoder.initOffsets32()
for j := 0; j <= i; j++ {
r.offsets32[j] = uint32(r.offsets[j])
}
r.large = true
}
if r.large {
r.offsets32[i] = uint32(len(r.data))
} else {
r.offsets[i] = uint16(len(r.data))
}
}
if !r.large {
if len(r.data) >= math.MaxUint16 {
r.large = true
encoder.initColIDs32()
for i, val := range r.colIDs {
r.colIDs32[i] = uint32(val)
}
} else {
encoder.initOffsets()
for i, val := range r.offsets32 {
r.offsets[i] = uint16(val)
}
}
}
buf = append(buf, CodecVer)
flag := byte(0)
if r.large {
flag = 1
}
buf = append(buf, flag)
buf = append(buf, byte(r.numNotNullCols), byte(r.numNotNullCols>>8))
buf = append(buf, byte(r.numNullCols), byte(r.numNullCols>>8))
buf = append(buf, r.valFlags...)
if r.large {
buf = append(buf, u32SliceToBytes(r.colIDs32)...)
buf = append(buf, u32SliceToBytes(r.offsets32)...)
} else {
buf = append(buf, r.colIDs...)
buf = append(buf, u16SliceToBytes(r.offsets)...)
}
buf = append(buf, r.data...)
return buf, nil
}
func (encoder *Encoder) initValFlags() {
if cap(encoder.valFlags) >= int(encoder.numNotNullCols) {
encoder.valFlags = encoder.valFlags[:encoder.numNotNullCols]
} else {
encoder.valFlags = make([]byte, encoder.numNotNullCols)
}
}
func (encoder *Encoder) initColIDs() {
numCols := int(encoder.numNotNullCols + encoder.numNullCols)
if cap(encoder.colIDs) >= numCols {
encoder.colIDs = encoder.colIDs[:numCols]
} else {
encoder.colIDs = make([]byte, numCols)
}
}
func (encoder *Encoder) initColIDs32() {
numCols := int(encoder.numNotNullCols + encoder.numNullCols)
if cap(encoder.colIDs32) >= numCols {
encoder.colIDs32 = encoder.colIDs32[:numCols]
} else {
encoder.colIDs32 = make([]uint32, numCols)
}
}
func (encoder *Encoder) initOffsets() {
if cap(encoder.offsets) >= int(encoder.numNotNullCols) {
encoder.offsets = encoder.offsets[:encoder.numNotNullCols]
} else {
encoder.offsets = make([]uint16, encoder.numNotNullCols)
}
}
func (encoder *Encoder) initOffsets32() {
if cap(encoder.offsets32) >= int(encoder.numNotNullCols) {
encoder.offsets32 = encoder.offsets32[:encoder.numNotNullCols]
} else {
encoder.offsets32 = make([]uint32, encoder.numNotNullCols)
}
}
type largeNotNullSorter Encoder
func (s *largeNotNullSorter) Less(i, j int) bool {
return s.colIDs32[i] < s.colIDs32[j]
}
func (s *largeNotNullSorter) Len() int {
return int(s.numNotNullCols)
}
func (s *largeNotNullSorter) Swap(i, j int) {
s.colIDs32[i], s.colIDs32[j] = s.colIDs32[j], s.colIDs32[i]
s.values[i], s.values[j] = s.values[j], s.values[i]
}
type smallNotNullSorter Encoder
func (s *smallNotNullSorter) Less(i, j int) bool {
return s.colIDs[i] < s.colIDs[j]
}
func (s *smallNotNullSorter) Len() int {
return int(s.numNotNullCols)
}
func (s *smallNotNullSorter) Swap(i, j int) {
s.colIDs[i], s.colIDs[j] = s.colIDs[j], s.colIDs[i]
s.values[i], s.values[j] = s.values[j], s.values[i]
}
type smallNullSorter Encoder
func (s *smallNullSorter) Less(i, j int) bool {
nullCols := s.colIDs[s.numNotNullCols:]
return nullCols[i] < nullCols[j]
}
func (s *smallNullSorter) Len() int {
return int(s.numNullCols)
}
func (s *smallNullSorter) Swap(i, j int) {
nullCols := s.colIDs[s.numNotNullCols:]
nullCols[i], nullCols[j] = nullCols[j], nullCols[i]
}
type largeNullSorter Encoder
func (s *largeNullSorter) Less(i, j int) bool {
nullCols := s.colIDs32[s.numNotNullCols:]
return nullCols[i] < nullCols[j]
}
func (s *largeNullSorter) Len() int {
return int(s.numNullCols)
}
func (s *largeNullSorter) Swap(i, j int) {
nullCols := s.colIDs32[s.numNotNullCols:]
nullCols[i], nullCols[j] = nullCols[j], nullCols[i]
}
var defaultStmtCtx = &stmtctx.StatementContext{
TimeZone: time.Local,
}
const (
// Length of rowkey.
rowKeyLen = 19
// Index of record flag 'r' in rowkey used by master tidb-server.
// The rowkey format is t{8 bytes id}_r{8 bytes handle}
recordPrefixIdx = 10
// Index of record flag 'r' in rowkey whit shard byte.
shardedRecordPrefixIdx = 1
)
func IsRowKeyWithShardByte(key []byte) bool {
return len(key) == rowKeyLen && key[0] == 't' && key[shardedRecordPrefixIdx] == 'r'
}
func IsRowKey(key []byte) bool {
return len(key) == rowKeyLen && key[0] == 't' && key[recordPrefixIdx] == 'r'
}
// RowToOldRow converts a row to old-format row.
func RowToOldRow(rowData, buf []byte) ([]byte, error) {
if len(rowData) == 0 {
return rowData, nil
}
buf = buf[:0]
var r row
err := r.setRowData(rowData)
if err != nil {
return nil, err
}
if !r.large {
for i, colID := range r.colIDs {
buf = encodeOldOne(&r, buf, i, int64(colID))
}
} else {
for i, colID := range r.colIDs32 {
buf = encodeOldOne(&r, buf, i, int64(colID))
}
}
if len(buf) == 0 {
buf = append(buf, NilFlag)
}
return buf, nil
}
func encodeOldOne(r *row, buf []byte, i int, colID int64) []byte {
buf = append(buf, VarintFlag)
buf = codec.EncodeVarint(buf, colID)
if i < int(r.numNotNullCols) {
val := r.getData(i)
switch r.valFlags[i] {
case BytesFlag:
buf = append(buf, CompactBytesFlag)
buf = codec.EncodeCompactBytes(buf, val)
case IntFlag:
buf = append(buf, VarintFlag)
buf = codec.EncodeVarint(buf, decodeInt(val))
case UintFlag:
buf = append(buf, VaruintFlag)
buf = codec.EncodeUvarint(buf, decodeUint(val))
default:
buf = append(buf, r.valFlags[i])
buf = append(buf, val...)
}
} else {
buf = append(buf, NilFlag)
}
return buf
}

View File

@ -0,0 +1,160 @@
package rowcodec
import (
"testing"
"time"
. "github.com/pingcap/check"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
)
func TestT(t *testing.T) {
TestingT(t)
}
var _ = Suite(&testSuite{})
type testSuite struct{}
func (s *testSuite) TestRowCodec(c *C) {
colIDs := []int64{1, 2, 3}
tps := make([]*types.FieldType, 3)
for i := 0; i < 3; i++ {
tps[i] = types.NewFieldType(mysql.TypeLonglong)
}
sc := new(stmtctx.StatementContext)
oldRow, err := tablecodec.EncodeRow(sc, types.MakeDatums(1, 2, 3), colIDs, nil, nil)
c.Check(err, IsNil)
var rb Encoder
newRow, err := rb.EncodeFromOldRow(oldRow, nil)
c.Check(err, IsNil)
rd, err := NewDecoder(colIDs, 0, tps, make([][]byte, 3), time.Local)
c.Assert(err, IsNil)
chk := chunk.NewChunkWithCapacity(tps, 1)
err = rd.Decode(newRow, -1, chk)
c.Assert(err, IsNil)
row := chk.GetRow(0)
for i := 0; i < 3; i++ {
c.Assert(row.GetInt64(i), Equals, int64(i)+1)
}
}
func (s *testSuite) TestRowCodecIsNull(c *C) {
colIDs := []int64{1, 2}
tps := make([]*types.FieldType, 2)
for i := 0; i < 2; i++ {
tps[i] = types.NewFieldType(mysql.TypeLonglong)
}
var rb Encoder
newRow, err := rb.Encode(colIDs, types.MakeDatums(1, nil), nil)
c.Assert(err, IsNil)
rd, err := NewDecoder(colIDs, 0, tps, make([][]byte, 3), time.Local)
c.Assert(err, IsNil)
defaultVal := make([]byte, 1)
isNull, err := rd.ColumnIsNull(newRow, 1, defaultVal)
c.Assert(err, IsNil)
c.Assert(isNull, IsFalse)
isNull, err = rd.ColumnIsNull(newRow, 1, nil)
c.Assert(err, IsNil)
c.Assert(isNull, IsFalse)
isNull, err = rd.ColumnIsNull(newRow, 2, defaultVal)
c.Assert(err, IsNil)
c.Assert(isNull, IsTrue)
isNull, err = rd.ColumnIsNull(newRow, 3, defaultVal)
c.Assert(err, IsNil)
c.Assert(isNull, IsFalse)
isNull, err = rd.ColumnIsNull(newRow, 3, nil)
c.Assert(err, IsNil)
c.Assert(isNull, IsTrue)
}
func BenchmarkEncode(b *testing.B) {
b.ReportAllocs()
oldRow := types.MakeDatums(1, "abc", 1.1)
var xb Encoder
var buf []byte
colIDs := []int64{1, 2, 3}
var err error
for i := 0; i < b.N; i++ {
buf, err = xb.Encode(colIDs, oldRow, buf)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkEncodeFromOldRow(b *testing.B) {
b.ReportAllocs()
oldRow := types.MakeDatums(1, "abc", 1.1)
oldRowData, err := tablecodec.EncodeRow(new(stmtctx.StatementContext), oldRow, []int64{1, 2, 3}, nil, nil)
if err != nil {
b.Fatal(err)
}
var xb Encoder
var buf []byte
for i := 0; i < b.N; i++ {
buf, err = xb.EncodeFromOldRow(oldRowData, buf)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkDecode(b *testing.B) {
b.ReportAllocs()
oldRow := types.MakeDatums(1, "abc", 1.1)
colIDs := []int64{-1, 2, 3}
tps := []*types.FieldType{
types.NewFieldType(mysql.TypeLonglong),
types.NewFieldType(mysql.TypeString),
types.NewFieldType(mysql.TypeDouble),
}
var xb Encoder
xRowData, err := xb.Encode(colIDs, oldRow, nil)
if err != nil {
b.Fatal(err)
}
decoder, err := NewDecoder(colIDs, -1, tps, make([][]byte, 3), time.Local)
if err != nil {
b.Fatal(err)
}
chk := chunk.NewChunkWithCapacity(tps, 1)
for i := 0; i < b.N; i++ {
chk.Reset()
err = decoder.Decode(xRowData, 1, chk)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkIsNull(b *testing.B) {
b.ReportAllocs()
oldRow := types.MakeDatums(1, "abc", 1.1)
colIDs := []int64{-1, 2, 3}
tps := []*types.FieldType{
types.NewFieldType(mysql.TypeLonglong),
types.NewFieldType(mysql.TypeString),
types.NewFieldType(mysql.TypeDouble),
}
var xb Encoder
xRowData, err := xb.Encode(colIDs, oldRow, nil)
if err != nil {
b.Fatal(err)
}
decoder, err := NewDecoder(colIDs, -1, tps, make([][]byte, 3), time.Local)
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
_, err = decoder.ColumnIsNull(xRowData, int64(i)%4, nil)
if err != nil {
b.Fatal(err)
}
}
}

91
kv/main.go Normal file
View File

@ -0,0 +1,91 @@
package main
import (
"flag"
"net"
_ "net/http/pprof"
"os"
"os/signal"
"strings"
"syscall"
"time"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/server"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/storage/raft_storage"
"github.com/pingcap-incubator/tinykv/kv/storage/standalone_storage"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/tinykvpb"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
)
var (
schedulerAddr = flag.String("scheduler", "", "scheduler address")
storeAddr = flag.String("addr", "", "store address")
)
func main() {
flag.Parse()
conf := config.NewDefaultConfig()
if *schedulerAddr != "" {
conf.SchedulerAddr = *schedulerAddr
}
if *storeAddr != "" {
conf.StoreAddr = *storeAddr
}
log.SetLevelByString(conf.LogLevel)
log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds | log.Lshortfile)
log.Infof("conf %v", conf)
var storage storage.Storage
if conf.Raft {
storage = raft_storage.NewRaftStorage(conf)
} else {
storage = standalone_storage.NewStandAloneStorage(conf)
}
if err := storage.Start(); err != nil {
log.Fatal(err)
}
server := server.NewServer(storage)
var alivePolicy = keepalive.EnforcementPolicy{
MinTime: 2 * time.Second, // If a client pings more than once every 2 seconds, terminate the connection
PermitWithoutStream: true, // Allow pings even when there are no active streams
}
grpcServer := grpc.NewServer(
grpc.KeepaliveEnforcementPolicy(alivePolicy),
grpc.InitialWindowSize(1<<30),
grpc.InitialConnWindowSize(1<<30),
grpc.MaxRecvMsgSize(10*1024*1024),
)
tinykvpb.RegisterTinyKvServer(grpcServer, server)
listenAddr := conf.StoreAddr[strings.IndexByte(conf.StoreAddr, ':'):]
l, err := net.Listen("tcp", listenAddr)
if err != nil {
log.Fatal(err)
}
handleSignal(grpcServer)
err = grpcServer.Serve(l)
if err != nil {
log.Fatal(err)
}
log.Info("Server stopped.")
}
func handleSignal(grpcServer *grpc.Server) {
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh,
syscall.SIGHUP,
syscall.SIGINT,
syscall.SIGTERM,
syscall.SIGQUIT)
go func() {
sig := <-sigCh
log.Infof("Got signal [%s] to exit.", sig)
grpcServer.Stop()
}()
}

View File

@ -0,0 +1,313 @@
package raftstore
import (
"bytes"
"sync"
"time"
"github.com/Connor1996/badger"
"github.com/Connor1996/badger/y"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/runner"
"github.com/pingcap-incubator/tinykv/kv/raftstore/scheduler_client"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/btree"
"github.com/pingcap/errors"
)
var _ btree.Item = &regionItem{}
type regionItem struct {
region *metapb.Region
}
// Less returns true if the region start key is less than the other.
func (r *regionItem) Less(other btree.Item) bool {
left := r.region.GetStartKey()
right := other.(*regionItem).region.GetStartKey()
return bytes.Compare(left, right) < 0
}
type storeMeta struct {
/// region end key -> region ID
regionRanges *btree.BTree
/// region_id -> region
regions map[uint64]*metapb.Region
/// `MsgRequestVote` messages from newly split Regions shouldn't be dropped if there is no
/// such Region in this store now. So the messages are recorded temporarily and will be handled later.
pendingVotes []*rspb.RaftMessage
}
func newStoreMeta() *storeMeta {
return &storeMeta{
regionRanges: btree.New(2),
regions: map[uint64]*metapb.Region{},
}
}
func (m *storeMeta) setRegion(region *metapb.Region, peer *peer) {
m.regions[region.Id] = region
peer.SetRegion(region)
}
// getOverlaps gets the regions which are overlapped with the specified region range.
func (m *storeMeta) getOverlapRegions(region *metapb.Region) []*metapb.Region {
item := &regionItem{region: region}
var result *regionItem
// find is a helper function to find an item that contains the regions start key.
m.regionRanges.DescendLessOrEqual(item, func(i btree.Item) bool {
result = i.(*regionItem)
return false
})
if result == nil || engine_util.ExceedEndKey(region.GetStartKey(), result.region.GetEndKey()) {
result = item
}
var overlaps []*metapb.Region
m.regionRanges.AscendGreaterOrEqual(result, func(i btree.Item) bool {
over := i.(*regionItem)
if engine_util.ExceedEndKey(over.region.GetStartKey(), region.GetEndKey()) {
return false
}
overlaps = append(overlaps, over.region)
return true
})
return overlaps
}
type GlobalContext struct {
cfg *config.Config
engine *engine_util.Engines
store *metapb.Store
storeMeta *storeMeta
snapMgr *snap.SnapManager
router *router
trans Transport
schedulerTaskSender chan<- worker.Task
regionTaskSender chan<- worker.Task
raftLogGCTaskSender chan<- worker.Task
splitCheckTaskSender chan<- worker.Task
schedulerClient scheduler_client.Client
tickDriverSender chan uint64
}
type Transport interface {
Send(msg *rspb.RaftMessage) error
}
/// loadPeers loads peers in this store. It scans the db engine, loads all regions and their peers from it
/// WARN: This store should not be used before initialized.
func (bs *RaftBatchSystem) loadPeers() ([]*peer, error) {
// Scan region meta to get saved regions.
startKey := meta.RegionMetaMinKey
endKey := meta.RegionMetaMaxKey
ctx := bs.ctx
kvEngine := ctx.engine.Kv
storeID := ctx.store.Id
var totalCount, tombStoneCount int
var regionPeers []*peer
t := time.Now()
kvWB := new(engine_util.WriteBatch)
raftWB := new(engine_util.WriteBatch)
err := kvEngine.View(func(txn *badger.Txn) error {
// get all regions from RegionLocalState
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
for it.Seek(startKey); it.Valid(); it.Next() {
item := it.Item()
if bytes.Compare(item.Key(), endKey) >= 0 {
break
}
regionID, suffix, err := meta.DecodeRegionMetaKey(item.Key())
if err != nil {
return err
}
if suffix != meta.RegionStateSuffix {
continue
}
val, err := item.Value()
if err != nil {
return errors.WithStack(err)
}
totalCount++
localState := new(rspb.RegionLocalState)
err = localState.Unmarshal(val)
if err != nil {
return errors.WithStack(err)
}
region := localState.Region
if localState.State == rspb.PeerState_Tombstone {
tombStoneCount++
bs.clearStaleMeta(kvWB, raftWB, localState)
continue
}
peer, err := createPeer(storeID, ctx.cfg, ctx.regionTaskSender, ctx.engine, region)
if err != nil {
return err
}
ctx.storeMeta.regionRanges.ReplaceOrInsert(&regionItem{region: region})
ctx.storeMeta.regions[regionID] = region
// No need to check duplicated here, because we use region id as the key
// in DB.
regionPeers = append(regionPeers, peer)
}
return nil
})
if err != nil {
return nil, err
}
kvWB.MustWriteToDB(ctx.engine.Kv)
raftWB.MustWriteToDB(ctx.engine.Raft)
log.Infof("start store %d, region_count %d, tombstone_count %d, takes %v",
storeID, totalCount, tombStoneCount, time.Since(t))
return regionPeers, nil
}
func (bs *RaftBatchSystem) clearStaleMeta(kvWB, raftWB *engine_util.WriteBatch, originState *rspb.RegionLocalState) {
region := originState.Region
raftState, err := meta.GetRaftLocalState(bs.ctx.engine.Raft, region.Id)
if err != nil {
// it has been cleaned up.
return
}
err = ClearMeta(bs.ctx.engine, kvWB, raftWB, region.Id, raftState.LastIndex)
if err != nil {
panic(err)
}
if err := kvWB.SetMeta(meta.RegionStateKey(region.Id), originState); err != nil {
panic(err)
}
}
type workers struct {
raftLogGCWorker *worker.Worker
schedulerWorker *worker.Worker
splitCheckWorker *worker.Worker
regionWorker *worker.Worker
wg *sync.WaitGroup
}
type RaftBatchSystem struct {
ctx *GlobalContext
storeState *storeState
router *router
workers *workers
tickDriver *tickDriver
closeCh chan struct{}
wg *sync.WaitGroup
}
func (bs *RaftBatchSystem) start(
meta *metapb.Store,
cfg *config.Config,
engines *engine_util.Engines,
trans Transport,
schedulerClient scheduler_client.Client,
snapMgr *snap.SnapManager) error {
y.Assert(bs.workers == nil)
// TODO: we can get cluster meta regularly too later.
if err := cfg.Validate(); err != nil {
return err
}
err := snapMgr.Init()
if err != nil {
return err
}
wg := new(sync.WaitGroup)
bs.workers = &workers{
splitCheckWorker: worker.NewWorker("split-check", wg),
regionWorker: worker.NewWorker("snapshot-worker", wg),
raftLogGCWorker: worker.NewWorker("raft-gc-worker", wg),
schedulerWorker: worker.NewWorker("scheduler-worker", wg),
wg: wg,
}
bs.ctx = &GlobalContext{
cfg: cfg,
engine: engines,
store: meta,
storeMeta: newStoreMeta(),
snapMgr: snapMgr,
router: bs.router,
trans: trans,
schedulerTaskSender: bs.workers.schedulerWorker.Sender(),
regionTaskSender: bs.workers.regionWorker.Sender(),
splitCheckTaskSender: bs.workers.splitCheckWorker.Sender(),
raftLogGCTaskSender: bs.workers.raftLogGCWorker.Sender(),
schedulerClient: schedulerClient,
tickDriverSender: bs.tickDriver.newRegionCh,
}
regionPeers, err := bs.loadPeers()
if err != nil {
return err
}
for _, peer := range regionPeers {
bs.router.register(peer)
}
bs.startWorkers(regionPeers)
return nil
}
func (bs *RaftBatchSystem) startWorkers(peers []*peer) {
ctx := bs.ctx
workers := bs.workers
router := bs.router
bs.wg.Add(2) // raftWorker, storeWorker
rw := newRaftWorker(ctx, router)
go rw.run(bs.closeCh, bs.wg)
sw := newStoreWorker(ctx, bs.storeState)
go sw.run(bs.closeCh, bs.wg)
router.sendStore(message.Msg{Type: message.MsgTypeStoreStart, Data: ctx.store})
for i := 0; i < len(peers); i++ {
regionID := peers[i].regionId
_ = router.send(regionID, message.Msg{RegionID: regionID, Type: message.MsgTypeStart})
}
engines := ctx.engine
cfg := ctx.cfg
workers.splitCheckWorker.Start(runner.NewSplitCheckHandler(engines.Kv, NewRaftstoreRouter(router), cfg))
workers.regionWorker.Start(runner.NewRegionTaskHandler(engines, ctx.snapMgr))
workers.raftLogGCWorker.Start(runner.NewRaftLogGCTaskHandler())
workers.schedulerWorker.Start(runner.NewSchedulerTaskHandler(ctx.store.Id, ctx.schedulerClient, NewRaftstoreRouter(router)))
go bs.tickDriver.run()
}
func (bs *RaftBatchSystem) shutDown() {
close(bs.closeCh)
bs.wg.Wait()
bs.tickDriver.stop()
if bs.workers == nil {
return
}
workers := bs.workers
bs.workers = nil
workers.splitCheckWorker.Stop()
workers.regionWorker.Stop()
workers.raftLogGCWorker.Stop()
workers.schedulerWorker.Stop()
workers.wg.Wait()
}
func CreateRaftBatchSystem(cfg *config.Config) (*RaftstoreRouter, *RaftBatchSystem) {
storeSender, storeState := newStoreState(cfg)
router := newRouter(storeSender)
raftBatchSystem := &RaftBatchSystem{
router: router,
storeState: storeState,
tickDriver: newTickDriver(cfg.RaftBaseTickInterval, router, storeState.ticker),
closeCh: make(chan struct{}),
wg: new(sync.WaitGroup),
}
return NewRaftstoreRouter(router), raftBatchSystem
}

154
kv/raftstore/bootstrap.go Normal file
View File

@ -0,0 +1,154 @@
package raftstore
import (
"bytes"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
)
const (
InitEpochVer uint64 = 1
InitEpochConfVer uint64 = 1
)
func isRangeEmpty(engine *badger.DB, startKey, endKey []byte) (bool, error) {
var hasData bool
err := engine.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
it.Seek(startKey)
if it.Valid() {
item := it.Item()
if bytes.Compare(item.Key(), endKey) < 0 {
hasData = true
}
}
return nil
})
if err != nil {
return false, errors.WithStack(err)
}
return !hasData, err
}
func BootstrapStore(engines *engine_util.Engines, clusterID, storeID uint64) error {
ident := new(rspb.StoreIdent)
empty, err := isRangeEmpty(engines.Kv, meta.MinKey, meta.MaxKey)
if err != nil {
return err
}
if !empty {
return errors.New("kv store is not empty and ahs alread had data.")
}
empty, err = isRangeEmpty(engines.Raft, meta.MinKey, meta.MaxKey)
if err != nil {
return err
}
if !empty {
return errors.New("raft store is not empty and has already had data.")
}
ident.ClusterId = clusterID
ident.StoreId = storeID
err = engine_util.PutMeta(engines.Kv, meta.StoreIdentKey, ident)
if err != nil {
return err
}
return nil
}
func PrepareBootstrap(engins *engine_util.Engines, storeID, regionID, peerID uint64) (*metapb.Region, error) {
region := &metapb.Region{
Id: regionID,
StartKey: []byte{},
EndKey: []byte{},
RegionEpoch: &metapb.RegionEpoch{
Version: InitEpochVer,
ConfVer: InitEpochConfVer,
},
Peers: []*metapb.Peer{
{
Id: peerID,
StoreId: storeID,
},
},
}
err := PrepareBootstrapCluster(engins, region)
if err != nil {
return nil, err
}
return region, nil
}
func PrepareBootstrapCluster(engines *engine_util.Engines, region *metapb.Region) error {
state := new(rspb.RegionLocalState)
state.Region = region
kvWB := new(engine_util.WriteBatch)
kvWB.SetMeta(meta.PrepareBootstrapKey, state)
kvWB.SetMeta(meta.RegionStateKey(region.Id), state)
writeInitialApplyState(kvWB, region.Id)
err := engines.WriteKV(kvWB)
if err != nil {
return err
}
raftWB := new(engine_util.WriteBatch)
writeInitialRaftState(raftWB, region.Id)
err = engines.WriteRaft(raftWB)
if err != nil {
return err
}
return nil
}
func writeInitialApplyState(kvWB *engine_util.WriteBatch, regionID uint64) {
applyState := &rspb.RaftApplyState{
AppliedIndex: meta.RaftInitLogIndex,
TruncatedState: &rspb.RaftTruncatedState{
Index: meta.RaftInitLogIndex,
Term: meta.RaftInitLogTerm,
},
}
kvWB.SetMeta(meta.ApplyStateKey(regionID), applyState)
}
func writeInitialRaftState(raftWB *engine_util.WriteBatch, regionID uint64) {
raftState := &rspb.RaftLocalState{
HardState: &eraftpb.HardState{
Term: meta.RaftInitLogTerm,
Commit: meta.RaftInitLogIndex,
},
LastIndex: meta.RaftInitLogIndex,
}
raftWB.SetMeta(meta.RaftStateKey(regionID), raftState)
}
func ClearPrepareBootstrap(engines *engine_util.Engines, regionID uint64) error {
err := engines.Raft.Update(func(txn *badger.Txn) error {
return txn.Delete(meta.RaftStateKey(regionID))
})
if err != nil {
return errors.WithStack(err)
}
wb := new(engine_util.WriteBatch)
wb.DeleteMeta(meta.PrepareBootstrapKey)
// should clear raft initial state too.
wb.DeleteMeta(meta.RegionStateKey(regionID))
wb.DeleteMeta(meta.ApplyStateKey(regionID))
err = engines.WriteKV(wb)
if err != nil {
return err
}
return nil
}
func ClearPrepareBootstrapState(engines *engine_util.Engines) error {
err := engines.Kv.Update(func(txn *badger.Txn) error {
return txn.Delete(meta.PrepareBootstrapKey)
})
return errors.WithStack(err)
}

View File

@ -0,0 +1,38 @@
package raftstore
import (
"testing"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/stretchr/testify/require"
)
func TestBootstrapStore(t *testing.T) {
engines := util.NewTestEngines()
defer engines.Destroy()
require.Nil(t, BootstrapStore(engines, 1, 1))
require.NotNil(t, BootstrapStore(engines, 1, 1))
_, err := PrepareBootstrap(engines, 1, 1, 1)
require.Nil(t, err)
region := new(metapb.Region)
require.Nil(t, engine_util.GetMeta(engines.Kv, meta.PrepareBootstrapKey, region))
_, err = meta.GetRegionLocalState(engines.Kv, 1)
require.Nil(t, err)
_, err = meta.GetApplyState(engines.Kv, 1)
require.Nil(t, err)
_, err = meta.GetRaftLocalState(engines.Raft, 1)
require.Nil(t, err)
require.Nil(t, ClearPrepareBootstrapState(engines))
require.Nil(t, ClearPrepareBootstrap(engines, 1))
empty, err := isRangeEmpty(engines.Kv, meta.RegionMetaPrefixKey(1), meta.RegionMetaPrefixKey(2))
require.Nil(t, err)
require.True(t, empty)
empty, err = isRangeEmpty(engines.Kv, meta.RegionRaftPrefixKey(1), meta.RegionRaftPrefixKey(2))
require.Nil(t, err)
require.True(t, empty)
}

62
kv/raftstore/cmd_resp.go Normal file
View File

@ -0,0 +1,62 @@
package raftstore
import (
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/proto/pkg/errorpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
)
func ensureRespHeader(resp *raft_cmdpb.RaftCmdResponse) {
header := resp.GetHeader()
if header == nil {
resp.Header = &raft_cmdpb.RaftResponseHeader{}
}
}
func BindRespTerm(resp *raft_cmdpb.RaftCmdResponse, term uint64) {
if term == 0 {
return
}
ensureRespHeader(resp)
resp.Header.CurrentTerm = term
}
func BindRespError(resp *raft_cmdpb.RaftCmdResponse, err error) {
ensureRespHeader(resp)
resp.Header.Error = util.RaftstoreErrToPbError(err)
}
func ErrResp(err error) *raft_cmdpb.RaftCmdResponse {
resp := &raft_cmdpb.RaftCmdResponse{Header: &raft_cmdpb.RaftResponseHeader{}}
BindRespError(resp, err)
return resp
}
func ErrRespWithTerm(err error, term uint64) *raft_cmdpb.RaftCmdResponse {
resp := ErrResp(err)
BindRespTerm(resp, term)
return resp
}
func ErrRespStaleCommand(term uint64) *raft_cmdpb.RaftCmdResponse {
return ErrRespWithTerm(new(util.ErrStaleCommand), term)
}
func ErrRespRegionNotFound(regionID uint64) *raft_cmdpb.RaftCmdResponse {
return &raft_cmdpb.RaftCmdResponse{
Header: &raft_cmdpb.RaftResponseHeader{
Error: &errorpb.Error{
Message: "region is not found",
RegionNotFound: &errorpb.RegionNotFound{
RegionId: regionID,
},
},
},
}
}
func newCmdRespForReq(req *raft_cmdpb.RaftCmdRequest) *raft_cmdpb.RaftCmdResponse {
return &raft_cmdpb.RaftCmdResponse{
Header: &raft_cmdpb.RaftResponseHeader{},
}
}

View File

@ -0,0 +1,46 @@
package message
import (
"time"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
)
type Callback struct {
Resp *raft_cmdpb.RaftCmdResponse
Txn *badger.Txn // used for GetSnap
done chan struct{}
}
func (cb *Callback) Done(resp *raft_cmdpb.RaftCmdResponse) {
if cb == nil {
return
}
if resp != nil {
cb.Resp = resp
}
cb.done <- struct{}{}
}
func (cb *Callback) WaitResp() *raft_cmdpb.RaftCmdResponse {
select {
case <-cb.done:
return cb.Resp
}
}
func (cb *Callback) WaitRespWithTimeout(timeout time.Duration) *raft_cmdpb.RaftCmdResponse {
select {
case <-cb.done:
return cb.Resp
case <-time.After(timeout):
return cb.Resp
}
}
func NewCallback() *Callback {
done := make(chan struct{}, 1)
cb := &Callback{done: done}
return cb
}

View File

@ -0,0 +1,70 @@
package message
import (
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
)
type MsgType int64
const (
// just a placeholder
MsgTypeNull MsgType = 0
// message to start the ticker of peer
MsgTypeStart MsgType = 1
// message of base tick to drive the ticker
MsgTypeTick MsgType = 2
// message wraps a raft message that should be forwardded to Raft module
// the raft message is from peer on other store
MsgTypeRaftMessage MsgType = 3
// message warps a raft command that maybe a read/write request or admin request
// the raft command should be proposed to Raft module
MsgTypeRaftCmd MsgType = 4
// message to trigger split region
// it first asks Scheduler for allocating new split region's ids, then schedules a
// MsyTypeRaftCmd with split admin command
MsgTypeSplitRegion MsgType = 5
// message to update region approximate size
// it is sent by split checker
MsgTypeRegionApproximateSize MsgType = 6
// message to trigger gc generated snapshots
MsgTypeGcSnap MsgType = 7
// message wraps a raft message to the peer not existing on the Store.
// It is due to region split or add peer conf change
MsgTypeStoreRaftMessage MsgType = 101
// message of store base tick to drive the store ticker, including store heartbeat
MsgTypeStoreTick MsgType = 106
// message to start the ticker of store
MsgTypeStoreStart MsgType = 107
)
type Msg struct {
Type MsgType
RegionID uint64
Data interface{}
}
func NewMsg(tp MsgType, data interface{}) Msg {
return Msg{Type: tp, Data: data}
}
func NewPeerMsg(tp MsgType, regionID uint64, data interface{}) Msg {
return Msg{Type: tp, RegionID: regionID, Data: data}
}
type MsgGCSnap struct {
Snaps []snap.SnapKeyWithSending
}
type MsgRaftCmd struct {
Request *raft_cmdpb.RaftCmdRequest
Callback *Callback
}
type MsgSplitRegion struct {
RegionEpoch *metapb.RegionEpoch
SplitKey []byte
Callback *Callback
}

View File

@ -0,0 +1,12 @@
package message
import (
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
)
type RaftRouter interface {
Send(regionID uint64, msg Msg) error
SendRaftMessage(msg *raft_serverpb.RaftMessage) error
SendRaftCommand(req *raft_cmdpb.RaftCmdRequest, cb *Callback) error
}

123
kv/raftstore/meta/keys.go Normal file
View File

@ -0,0 +1,123 @@
package meta
import (
"bytes"
"encoding/binary"
"github.com/pingcap/errors"
)
const (
// local is in (0x01, 0x02)
LocalPrefix byte = 0x01
// We save two types region data in DB, for raft and other meta data.
// When the store starts, we should iterate all region meta data to
// construct peer, no need to travel large raft data, so we separate them
// with different prefixes.
RegionRaftPrefix byte = 0x02
RegionMetaPrefix byte = 0x03
RegionRaftPrefixLen = 11 // REGION_RAFT_PREFIX_KEY + region_id + suffix
RegionRaftLogLen = 19 // REGION_RAFT_PREFIX_KEY + region_id + suffix + index
// Following are the suffix after the local prefix.
// For region id
RaftLogSuffix byte = 0x01
RaftStateSuffix byte = 0x02
ApplyStateSuffix byte = 0x03
// For region meta
RegionStateSuffix byte = 0x01
)
var (
MinKey = []byte{}
MaxKey = []byte{255}
LocalMinKey = []byte{LocalPrefix}
LocalMaxKey = []byte{LocalPrefix + 1}
RegionMetaMinKey = []byte{LocalPrefix, RegionMetaPrefix}
RegionMetaMaxKey = []byte{LocalPrefix, RegionMetaPrefix + 1}
// Following keys are all local keys, so the first byte must be 0x01.
PrepareBootstrapKey = []byte{LocalPrefix, 0x01}
StoreIdentKey = []byte{LocalPrefix, 0x02}
)
func makeRegionPrefix(regionID uint64, suffix byte) []byte {
key := make([]byte, 11)
key[0] = LocalPrefix
key[1] = RegionRaftPrefix
binary.BigEndian.PutUint64(key[2:], regionID)
key[10] = suffix
return key
}
func makeRegionKey(regionID uint64, suffix byte, subID uint64) []byte {
key := make([]byte, 19)
key[0] = LocalPrefix
key[1] = RegionRaftPrefix
binary.BigEndian.PutUint64(key[2:], regionID)
key[10] = suffix
binary.BigEndian.PutUint64(key[11:], subID)
return key
}
func RegionRaftPrefixKey(regionID uint64) []byte {
key := make([]byte, 10)
key[0] = LocalPrefix
key[1] = RegionRaftPrefix
binary.BigEndian.PutUint64(key[2:], regionID)
return key
}
func RaftLogKey(regionID, index uint64) []byte {
return makeRegionKey(regionID, RaftLogSuffix, index)
}
func RaftStateKey(regionID uint64) []byte {
return makeRegionPrefix(regionID, RaftStateSuffix)
}
func ApplyStateKey(regionID uint64) []byte {
return makeRegionPrefix(regionID, ApplyStateSuffix)
}
func IsRaftStateKey(key []byte) bool {
return len(key) == 11 && key[0] == LocalPrefix && key[1] == RegionRaftPrefix
}
func DecodeRegionMetaKey(key []byte) (uint64, byte, error) {
if len(RegionMetaMinKey)+8+1 != len(key) {
return 0, 0, errors.Errorf("invalid region meta key length for key %v", key)
}
if !bytes.HasPrefix(key, RegionMetaMinKey) {
return 0, 0, errors.Errorf("invalid region meta key prefix for key %v", key)
}
regionID := binary.BigEndian.Uint64(key[len(RegionMetaMinKey):])
return regionID, key[len(key)-1], nil
}
func RegionMetaPrefixKey(regionID uint64) []byte {
key := make([]byte, 10)
key[0] = LocalPrefix
key[1] = RegionMetaPrefix
binary.BigEndian.PutUint64(key[2:], regionID)
return key
}
func RegionStateKey(regionID uint64) []byte {
key := make([]byte, 11)
key[0] = LocalPrefix
key[1] = RegionMetaPrefix
binary.BigEndian.PutUint64(key[2:], regionID)
key[10] = RegionStateSuffix
return key
}
/// RaftLogIndex gets the log index from raft log key generated by `raft_log_key`.
func RaftLogIndex(key []byte) (uint64, error) {
if len(key) != RegionRaftLogLen {
return 0, errors.Errorf("key %v is not a valid raft log key", key)
}
return binary.BigEndian.Uint64(key[RegionRaftLogLen-8:]), nil
}

119
kv/raftstore/meta/values.go Normal file
View File

@ -0,0 +1,119 @@
package meta
import (
"github.com/Connor1996/badger"
"github.com/Connor1996/badger/y"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
)
func GetRegionLocalState(db *badger.DB, regionId uint64) (*rspb.RegionLocalState, error) {
regionLocalState := new(rspb.RegionLocalState)
if err := engine_util.GetMeta(db, RegionStateKey(regionId), regionLocalState); err != nil {
return regionLocalState, err
}
return regionLocalState, nil
}
func GetRaftLocalState(db *badger.DB, regionId uint64) (*rspb.RaftLocalState, error) {
raftLocalState := new(rspb.RaftLocalState)
if err := engine_util.GetMeta(db, RaftStateKey(regionId), raftLocalState); err != nil {
return raftLocalState, err
}
return raftLocalState, nil
}
func GetApplyState(db *badger.DB, regionId uint64) (*rspb.RaftApplyState, error) {
applyState := new(rspb.RaftApplyState)
if err := engine_util.GetMeta(db, ApplyStateKey(regionId), applyState); err != nil {
return nil, err
}
return applyState, nil
}
func GetRaftEntry(db *badger.DB, regionId, idx uint64) (*eraftpb.Entry, error) {
entry := new(eraftpb.Entry)
if err := engine_util.GetMeta(db, RaftLogKey(regionId, idx), entry); err != nil {
return nil, err
}
return entry, nil
}
const (
// When we create a region peer, we should initialize its log term/index > 0,
// so that we can force the follower peer to sync the snapshot first.
RaftInitLogTerm = 5
RaftInitLogIndex = 5
)
func InitRaftLocalState(raftEngine *badger.DB, region *metapb.Region) (*rspb.RaftLocalState, error) {
raftState, err := GetRaftLocalState(raftEngine, region.Id)
if err != nil && err != badger.ErrKeyNotFound {
return nil, err
}
if err == badger.ErrKeyNotFound {
raftState = new(rspb.RaftLocalState)
raftState.HardState = new(eraftpb.HardState)
if len(region.Peers) > 0 {
// new split region
raftState.LastIndex = RaftInitLogIndex
raftState.HardState.Term = RaftInitLogTerm
raftState.HardState.Commit = RaftInitLogIndex
err = engine_util.PutMeta(raftEngine, RaftStateKey(region.Id), raftState)
if err != nil {
return raftState, err
}
}
}
return raftState, nil
}
func InitApplyState(kvEngine *badger.DB, region *metapb.Region) (*rspb.RaftApplyState, error) {
applyState, err := GetApplyState(kvEngine, region.Id)
if err != nil && err != badger.ErrKeyNotFound {
return nil, err
}
if err == badger.ErrKeyNotFound {
applyState = new(rspb.RaftApplyState)
applyState.TruncatedState = new(rspb.RaftTruncatedState)
if len(region.Peers) > 0 {
applyState.AppliedIndex = RaftInitLogIndex
applyState.TruncatedState.Index = RaftInitLogIndex
applyState.TruncatedState.Term = RaftInitLogTerm
}
err = engine_util.PutMeta(kvEngine, ApplyStateKey(region.Id), applyState)
if err != nil {
return applyState, err
}
}
return applyState, nil
}
func InitLastTerm(raftEngine *badger.DB, region *metapb.Region,
raftState *rspb.RaftLocalState, applyState *rspb.RaftApplyState) (uint64, error) {
lastIdx := raftState.LastIndex
if lastIdx == 0 {
return 0, nil
} else if lastIdx == RaftInitLogIndex {
return RaftInitLogTerm, nil
} else if lastIdx == applyState.TruncatedState.Index {
return applyState.TruncatedState.Term, nil
} else {
y.Assert(lastIdx > RaftInitLogIndex)
}
e, err := GetRaftEntry(raftEngine, region.Id, lastIdx)
if err != nil {
return 0, errors.Errorf("[region %s] entry at %d doesn't exist, may lost data.", region, lastIdx)
}
return e.Term, nil
}
func WriteRegionState(kvWB *engine_util.WriteBatch, region *metapb.Region, state rspb.PeerState) {
regionState := new(rspb.RegionLocalState)
regionState.State = state
regionState.Region = region
kvWB.SetMeta(RegionStateKey(region.Id), regionState)
}

208
kv/raftstore/node.go Normal file
View File

@ -0,0 +1,208 @@
package raftstore
import (
"context"
"time"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/scheduler_client"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/schedulerpb"
"github.com/pingcap/errors"
)
type Node struct {
clusterID uint64
store *metapb.Store
cfg *config.Config
system *RaftBatchSystem
schedulerClient scheduler_client.Client
}
func NewNode(system *RaftBatchSystem, cfg *config.Config, schedulerClient scheduler_client.Client) *Node {
return &Node{
clusterID: schedulerClient.GetClusterID((context.TODO())),
store: &metapb.Store{
Address: cfg.StoreAddr,
},
cfg: cfg,
system: system,
schedulerClient: schedulerClient,
}
}
func (n *Node) Start(ctx context.Context, engines *engine_util.Engines, trans Transport, snapMgr *snap.SnapManager) error {
storeID, err := n.checkStore(engines)
if err != nil {
return err
}
if storeID == util.InvalidID {
storeID, err = n.bootstrapStore(ctx, engines)
}
if err != nil {
return err
}
n.store.Id = storeID
firstRegion, err := n.checkOrPrepareBootstrapCluster(ctx, engines, storeID)
if err != nil {
return err
}
newCluster := firstRegion != nil
if newCluster {
log.Infof("try bootstrap cluster, storeID: %d, region: %s", storeID, firstRegion)
newCluster, err = n.BootstrapCluster(ctx, engines, firstRegion)
if err != nil {
return err
}
}
err = n.schedulerClient.PutStore(ctx, n.store)
if err != nil {
return err
}
if err = n.startNode(engines, trans, snapMgr); err != nil {
return err
}
return nil
}
func (n *Node) checkStore(engines *engine_util.Engines) (uint64, error) {
ident := new(raft_serverpb.StoreIdent)
err := engine_util.GetMeta(engines.Kv, meta.StoreIdentKey, ident)
if err != nil {
if err == badger.ErrKeyNotFound {
return 0, nil
}
return 0, err
}
if ident.ClusterId != n.clusterID {
return 0, errors.Errorf("cluster ID mismatch, local %d != remote %d", ident.ClusterId, n.clusterID)
}
if ident.StoreId == util.InvalidID {
return 0, errors.Errorf("invalid store ident %s", ident)
}
return ident.StoreId, nil
}
func (n *Node) bootstrapStore(ctx context.Context, engines *engine_util.Engines) (uint64, error) {
storeID, err := n.allocID(ctx)
if err != nil {
return 0, err
}
err = BootstrapStore(engines, n.clusterID, storeID)
return storeID, err
}
func (n *Node) allocID(ctx context.Context) (uint64, error) {
return n.schedulerClient.AllocID(ctx)
}
func (n *Node) checkOrPrepareBootstrapCluster(ctx context.Context, engines *engine_util.Engines, storeID uint64) (*metapb.Region, error) {
var state raft_serverpb.RegionLocalState
if err := engine_util.GetMeta(engines.Kv, meta.PrepareBootstrapKey, &state); err == nil {
return state.Region, nil
}
bootstrapped, err := n.checkClusterBootstrapped(ctx)
if err != nil {
return nil, err
}
if bootstrapped {
return nil, nil
}
return n.prepareBootstrapCluster(ctx, engines, storeID)
}
const (
MaxCheckClusterBootstrappedRetryCount = 60
CheckClusterBootstrapRetrySeconds = 3
)
func (n *Node) checkClusterBootstrapped(ctx context.Context) (bool, error) {
for i := 0; i < MaxCheckClusterBootstrappedRetryCount; i++ {
bootstrapped, err := n.schedulerClient.IsBootstrapped(ctx)
if err == nil {
return bootstrapped, nil
}
log.Warnf("check cluster bootstrapped failed, err: %v", err)
time.Sleep(time.Second * CheckClusterBootstrapRetrySeconds)
}
return false, errors.New("check cluster bootstrapped failed")
}
func (n *Node) prepareBootstrapCluster(ctx context.Context, engines *engine_util.Engines, storeID uint64) (*metapb.Region, error) {
regionID, err := n.allocID(ctx)
if err != nil {
return nil, err
}
log.Infof("alloc first region id, regionID: %d, clusterID: %d, storeID: %d", regionID, n.clusterID, storeID)
peerID, err := n.allocID(ctx)
if err != nil {
return nil, err
}
log.Infof("alloc first peer id for first region, peerID: %d, regionID: %d", peerID, regionID)
return PrepareBootstrap(engines, storeID, regionID, peerID)
}
func (n *Node) BootstrapCluster(ctx context.Context, engines *engine_util.Engines, firstRegion *metapb.Region) (newCluster bool, err error) {
regionID := firstRegion.GetId()
for retry := 0; retry < MaxCheckClusterBootstrappedRetryCount; retry++ {
if retry != 0 {
time.Sleep(time.Second)
}
res, err := n.schedulerClient.Bootstrap(ctx, n.store)
if err != nil {
log.Errorf("bootstrap cluster failed, clusterID: %d, err: %v", n.clusterID, err)
continue
}
resErr := res.GetHeader().GetError()
if resErr == nil {
log.Infof("bootstrap cluster ok, clusterID: %d", n.clusterID)
return true, ClearPrepareBootstrapState(engines)
}
if resErr.GetType() == schedulerpb.ErrorType_ALREADY_BOOTSTRAPPED {
region, _, err := n.schedulerClient.GetRegion(ctx, []byte{})
if err != nil {
log.Errorf("get first region failed, err: %v", err)
continue
}
if region.GetId() == regionID {
return false, ClearPrepareBootstrapState(engines)
}
log.Infof("cluster is already bootstrapped, clusterID: %v", n.clusterID)
return false, ClearPrepareBootstrap(engines, regionID)
}
log.Errorf("bootstrap cluster, clusterID: %v, err: %v", n.clusterID, resErr)
}
return false, errors.New("bootstrap cluster failed")
}
func (n *Node) startNode(engines *engine_util.Engines, trans Transport, snapMgr *snap.SnapManager) error {
log.Infof("start raft store node, storeID: %d", n.store.GetId())
return n.system.start(n.store, n.cfg, engines, trans, n.schedulerClient, snapMgr)
}
func (n *Node) stopNode(storeID uint64) {
log.Infof("stop raft store thread, storeID: %d", storeID)
n.system.shutDown()
}
func (n *Node) Stop() {
n.stopNode(n.store.GetId())
}
func (n *Node) GetStoreID() uint64 {
return n.store.GetId()
}

387
kv/raftstore/peer.go Normal file
View File

@ -0,0 +1,387 @@
package raftstore
import (
"fmt"
"time"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/runner"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/raft"
"github.com/pingcap/errors"
)
func NotifyStaleReq(term uint64, cb *message.Callback) {
cb.Done(ErrRespStaleCommand(term))
}
func NotifyReqRegionRemoved(regionId uint64, cb *message.Callback) {
regionNotFound := &util.ErrRegionNotFound{RegionId: regionId}
resp := ErrResp(regionNotFound)
cb.Done(resp)
}
// If we create the peer actively, like bootstrap/split/merge region, we should
// use this function to create the peer. The region must contain the peer info
// for this store.
func createPeer(storeID uint64, cfg *config.Config, sched chan<- worker.Task,
engines *engine_util.Engines, region *metapb.Region) (*peer, error) {
metaPeer := util.FindPeer(region, storeID)
if metaPeer == nil {
return nil, errors.Errorf("find no peer for store %d in region %v", storeID, region)
}
log.Infof("region %v create peer with ID %d", region, metaPeer.Id)
return NewPeer(storeID, cfg, engines, region, sched, metaPeer)
}
// The peer can be created from another node with raft membership changes, and we only
// know the region_id and peer_id when creating this replicated peer, the region info
// will be retrieved later after applying snapshot.
func replicatePeer(storeID uint64, cfg *config.Config, sched chan<- worker.Task,
engines *engine_util.Engines, regionID uint64, metaPeer *metapb.Peer) (*peer, error) {
// We will remove tombstone key when apply snapshot
log.Infof("[region %v] replicates peer with ID %d", regionID, metaPeer.GetId())
region := &metapb.Region{
Id: regionID,
RegionEpoch: &metapb.RegionEpoch{},
}
return NewPeer(storeID, cfg, engines, region, sched, metaPeer)
}
type proposal struct {
// index + term for unique identification
index uint64
term uint64
cb *message.Callback
}
type peer struct {
// The ticker of the peer, used to trigger
// * raft tick
// * raft log gc
// * region heartbeat
// * split check
ticker *ticker
// Instance of the Raft module
RaftGroup *raft.RawNode
// The peer storage for the Raft module
peerStorage *PeerStorage
// Record the meta information of the peer
Meta *metapb.Peer
regionId uint64
// Tag which is useful for printing log
Tag string
// Record the callback of the proposals
// (Used in 2B)
proposals []*proposal
// Index of last scheduled compacted raft log.
// (Used in 2C)
LastCompactedIdx uint64
// Cache the peers information from other stores
// when sending raft messages to other peers, it's used to get the store id of target peer
// (Used in 3B conf change)
peerCache map[uint64]*metapb.Peer
// Record the instants of peers being added into the configuration.
// Remove them after they are not pending any more.
// (Used in 3B conf change)
PeersStartPendingTime map[uint64]time.Time
// Mark the peer as stopped, set when peer is destroyed
// (Used in 3B conf change)
stopped bool
// An inaccurate difference in region size since last reset.
// split checker is triggered when it exceeds the threshold, it makes split checker not scan the data very often
// (Used in 3B split)
SizeDiffHint uint64
// Approximate size of the region.
// It's updated everytime the split checker scan the data
// (Used in 3B split)
ApproximateSize *uint64
}
func NewPeer(storeId uint64, cfg *config.Config, engines *engine_util.Engines, region *metapb.Region, regionSched chan<- worker.Task,
meta *metapb.Peer) (*peer, error) {
if meta.GetId() == util.InvalidID {
return nil, fmt.Errorf("invalid peer id")
}
tag := fmt.Sprintf("[region %v] %v", region.GetId(), meta.GetId())
ps, err := NewPeerStorage(engines, region, regionSched, tag)
if err != nil {
return nil, err
}
appliedIndex := ps.AppliedIndex()
raftCfg := &raft.Config{
ID: meta.GetId(),
ElectionTick: cfg.RaftElectionTimeoutTicks,
HeartbeatTick: cfg.RaftHeartbeatTicks,
Applied: appliedIndex,
Storage: ps,
}
raftGroup, err := raft.NewRawNode(raftCfg)
if err != nil {
return nil, err
}
p := &peer{
Meta: meta,
regionId: region.GetId(),
RaftGroup: raftGroup,
peerStorage: ps,
peerCache: make(map[uint64]*metapb.Peer),
PeersStartPendingTime: make(map[uint64]time.Time),
Tag: tag,
ticker: newTicker(region.GetId(), cfg),
}
// If this region has only one peer and I am the one, campaign directly.
if len(region.GetPeers()) == 1 && region.GetPeers()[0].GetStoreId() == storeId {
err = p.RaftGroup.Campaign()
if err != nil {
return nil, err
}
}
return p, nil
}
func (p *peer) insertPeerCache(peer *metapb.Peer) {
p.peerCache[peer.GetId()] = peer
}
func (p *peer) removePeerCache(peerID uint64) {
delete(p.peerCache, peerID)
}
func (p *peer) getPeerFromCache(peerID uint64) *metapb.Peer {
if peer, ok := p.peerCache[peerID]; ok {
return peer
}
for _, peer := range p.peerStorage.Region().GetPeers() {
if peer.GetId() == peerID {
p.insertPeerCache(peer)
return peer
}
}
return nil
}
func (p *peer) nextProposalIndex() uint64 {
return p.RaftGroup.Raft.RaftLog.LastIndex() + 1
}
/// Tries to destroy itself. Returns a job (if needed) to do more cleaning tasks.
func (p *peer) MaybeDestroy() bool {
if p.stopped {
log.Infof("%v is being destroyed, skip", p.Tag)
return false
}
return true
}
/// Does the real destroy worker.Task which includes:
/// 1. Set the region to tombstone;
/// 2. Clear data;
/// 3. Notify all pending requests.
func (p *peer) Destroy(engine *engine_util.Engines, keepData bool) error {
start := time.Now()
region := p.Region()
log.Infof("%v begin to destroy", p.Tag)
// Set Tombstone state explicitly
kvWB := new(engine_util.WriteBatch)
raftWB := new(engine_util.WriteBatch)
if err := p.peerStorage.clearMeta(kvWB, raftWB); err != nil {
return err
}
meta.WriteRegionState(kvWB, region, rspb.PeerState_Tombstone)
// write kv rocksdb first in case of restart happen between two write
if err := kvWB.WriteToDB(engine.Kv); err != nil {
return err
}
if err := raftWB.WriteToDB(engine.Raft); err != nil {
return err
}
if p.peerStorage.isInitialized() && !keepData {
// If we meet panic when deleting data and raft log, the dirty data
// will be cleared by a newer snapshot applying or restart.
p.peerStorage.ClearData()
}
for _, proposal := range p.proposals {
NotifyReqRegionRemoved(region.Id, proposal.cb)
}
p.proposals = nil
log.Infof("%v destroy itself, takes %v", p.Tag, time.Now().Sub(start))
return nil
}
func (p *peer) isInitialized() bool {
return p.peerStorage.isInitialized()
}
func (p *peer) storeID() uint64 {
return p.Meta.StoreId
}
func (p *peer) Region() *metapb.Region {
return p.peerStorage.Region()
}
/// Set the region of a peer.
///
/// This will update the region of the peer, caller must ensure the region
/// has been preserved in a durable device.
func (p *peer) SetRegion(region *metapb.Region) {
p.peerStorage.SetRegion(region)
}
func (p *peer) PeerId() uint64 {
return p.Meta.GetId()
}
func (p *peer) LeaderId() uint64 {
return p.RaftGroup.Raft.Lead
}
func (p *peer) IsLeader() bool {
return p.RaftGroup.Raft.State == raft.StateLeader
}
func (p *peer) Send(trans Transport, msgs []eraftpb.Message) {
for _, msg := range msgs {
err := p.sendRaftMessage(msg, trans)
if err != nil {
log.Debugf("%v send message err: %v", p.Tag, err)
}
}
}
/// Collects all pending peers and update `peers_start_pending_time`.
func (p *peer) CollectPendingPeers() []*metapb.Peer {
pendingPeers := make([]*metapb.Peer, 0, len(p.Region().GetPeers()))
truncatedIdx := p.peerStorage.truncatedIndex()
for id, progress := range p.RaftGroup.GetProgress() {
if id == p.Meta.GetId() {
continue
}
if progress.Match < truncatedIdx {
if peer := p.getPeerFromCache(id); peer != nil {
pendingPeers = append(pendingPeers, peer)
if _, ok := p.PeersStartPendingTime[id]; !ok {
now := time.Now()
p.PeersStartPendingTime[id] = now
log.Debugf("%v peer %v start pending at %v", p.Tag, id, now)
}
}
}
}
return pendingPeers
}
func (p *peer) clearPeersStartPendingTime() {
for id := range p.PeersStartPendingTime {
delete(p.PeersStartPendingTime, id)
}
}
/// Returns `true` if any new peer catches up with the leader in replicating logs.
/// And updates `PeersStartPendingTime` if needed.
func (p *peer) AnyNewPeerCatchUp(peerId uint64) bool {
if len(p.PeersStartPendingTime) == 0 {
return false
}
if !p.IsLeader() {
p.clearPeersStartPendingTime()
return false
}
if startPendingTime, ok := p.PeersStartPendingTime[peerId]; ok {
truncatedIdx := p.peerStorage.truncatedIndex()
progress, ok := p.RaftGroup.Raft.Prs[peerId]
if ok {
if progress.Match >= truncatedIdx {
delete(p.PeersStartPendingTime, peerId)
elapsed := time.Since(startPendingTime)
log.Debugf("%v peer %v has caught up logs, elapsed: %v", p.Tag, peerId, elapsed)
return true
}
}
}
return false
}
func (p *peer) MaybeCampaign(parentIsLeader bool) bool {
// The peer campaigned when it was created, no need to do it again.
if len(p.Region().GetPeers()) <= 1 || !parentIsLeader {
return false
}
// If last peer is the leader of the region before split, it's intuitional for
// it to become the leader of new split region.
p.RaftGroup.Campaign()
return true
}
func (p *peer) Term() uint64 {
return p.RaftGroup.Raft.Term
}
func (p *peer) HeartbeatScheduler(ch chan<- worker.Task) {
ch <- &runner.SchedulerRegionHeartbeatTask{
Region: p.Region(),
Peer: p.Meta,
PendingPeers: p.CollectPendingPeers(),
ApproximateSize: p.ApproximateSize,
}
}
func (p *peer) sendRaftMessage(msg eraftpb.Message, trans Transport) error {
sendMsg := new(rspb.RaftMessage)
sendMsg.RegionId = p.regionId
// set current epoch
sendMsg.RegionEpoch = &metapb.RegionEpoch{
ConfVer: p.Region().RegionEpoch.ConfVer,
Version: p.Region().RegionEpoch.Version,
}
fromPeer := *p.Meta
toPeer := p.getPeerFromCache(msg.To)
if toPeer == nil {
return fmt.Errorf("failed to lookup recipient peer %v in region %v", msg.To, p.regionId)
}
log.Debugf("%v, send raft msg %v from %v to %v", p.Tag, msg.MsgType, fromPeer.Id, toPeer.Id)
sendMsg.FromPeer = &fromPeer
sendMsg.ToPeer = toPeer
// There could be two cases:
// 1. Target peer already exists but has not established communication with leader yet
// 2. Target peer is added newly due to member change or region split, but it's not
// created yet
// For both cases the region start key and end key are attached in RequestVote and
// Heartbeat message for the store of that peer to check whether to create a new peer
// when receiving these messages, or just to wait for a pending region split to perform
// later.
if p.peerStorage.isInitialized() && util.IsInitialMsg(&msg) {
sendMsg.StartKey = append([]byte{}, p.Region().StartKey...)
sendMsg.EndKey = append([]byte{}, p.Region().EndKey...)
}
sendMsg.Message = &msg
return trans.Send(sendMsg)
}

View File

@ -0,0 +1,567 @@
package raftstore
import (
"fmt"
"time"
"github.com/Connor1996/badger/y"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/runner"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/btree"
"github.com/pingcap/errors"
)
type PeerTick int
const (
PeerTickRaft PeerTick = 0
PeerTickRaftLogGC PeerTick = 1
PeerTickSplitRegionCheck PeerTick = 2
PeerTickSchedulerHeartbeat PeerTick = 3
)
type peerMsgHandler struct {
*peer
ctx *GlobalContext
}
func newPeerMsgHandler(peer *peer, ctx *GlobalContext) *peerMsgHandler {
return &peerMsgHandler{
peer: peer,
ctx: ctx,
}
}
func (d *peerMsgHandler) HandleRaftReady() {
if d.stopped {
return
}
// Your Code Here (2B).
}
func (d *peerMsgHandler) HandleMsg(msg message.Msg) {
switch msg.Type {
case message.MsgTypeRaftMessage:
raftMsg := msg.Data.(*rspb.RaftMessage)
if err := d.onRaftMsg(raftMsg); err != nil {
log.Errorf("%s handle raft message error %v", d.Tag, err)
}
case message.MsgTypeRaftCmd:
raftCMD := msg.Data.(*message.MsgRaftCmd)
d.proposeRaftCommand(raftCMD.Request, raftCMD.Callback)
case message.MsgTypeTick:
d.onTick()
case message.MsgTypeSplitRegion:
split := msg.Data.(*message.MsgSplitRegion)
log.Infof("%s on split with %v", d.Tag, split.SplitKey)
d.onPrepareSplitRegion(split.RegionEpoch, split.SplitKey, split.Callback)
case message.MsgTypeRegionApproximateSize:
d.onApproximateRegionSize(msg.Data.(uint64))
case message.MsgTypeGcSnap:
gcSnap := msg.Data.(*message.MsgGCSnap)
d.onGCSnap(gcSnap.Snaps)
case message.MsgTypeStart:
d.startTicker()
}
}
func (d *peerMsgHandler) preProposeRaftCommand(req *raft_cmdpb.RaftCmdRequest) error {
// Check store_id, make sure that the msg is dispatched to the right place.
if err := util.CheckStoreID(req, d.storeID()); err != nil {
return err
}
// Check whether the store has the right peer to handle the request.
regionID := d.regionId
leaderID := d.LeaderId()
if !d.IsLeader() {
leader := d.getPeerFromCache(leaderID)
return &util.ErrNotLeader{RegionId: regionID, Leader: leader}
}
// peer_id must be the same as peer's.
if err := util.CheckPeerID(req, d.PeerId()); err != nil {
return err
}
// Check whether the term is stale.
if err := util.CheckTerm(req, d.Term()); err != nil {
return err
}
err := util.CheckRegionEpoch(req, d.Region(), true)
if errEpochNotMatching, ok := err.(*util.ErrEpochNotMatch); ok {
// Attach the region which might be split from the current region. But it doesn't
// matter if the region is not split from the current region. If the region meta
// received by the TiKV driver is newer than the meta cached in the driver, the meta is
// updated.
siblingRegion := d.findSiblingRegion()
if siblingRegion != nil {
errEpochNotMatching.Regions = append(errEpochNotMatching.Regions, siblingRegion)
}
return errEpochNotMatching
}
return err
}
func (d *peerMsgHandler) proposeRaftCommand(msg *raft_cmdpb.RaftCmdRequest, cb *message.Callback) {
err := d.preProposeRaftCommand(msg)
if err != nil {
cb.Done(ErrResp(err))
return
}
// Your Code Here (2B).
}
func (d *peerMsgHandler) onTick() {
if d.stopped {
return
}
d.ticker.tickClock()
if d.ticker.isOnTick(PeerTickRaft) {
d.onRaftBaseTick()
}
if d.ticker.isOnTick(PeerTickRaftLogGC) {
d.onRaftGCLogTick()
}
if d.ticker.isOnTick(PeerTickSchedulerHeartbeat) {
d.onSchedulerHeartbeatTick()
}
if d.ticker.isOnTick(PeerTickSplitRegionCheck) {
d.onSplitRegionCheckTick()
}
d.ctx.tickDriverSender <- d.regionId
}
func (d *peerMsgHandler) startTicker() {
d.ticker = newTicker(d.regionId, d.ctx.cfg)
d.ctx.tickDriverSender <- d.regionId
d.ticker.schedule(PeerTickRaft)
d.ticker.schedule(PeerTickRaftLogGC)
d.ticker.schedule(PeerTickSplitRegionCheck)
d.ticker.schedule(PeerTickSchedulerHeartbeat)
}
func (d *peerMsgHandler) onRaftBaseTick() {
d.RaftGroup.Tick()
d.ticker.schedule(PeerTickRaft)
}
func (d *peerMsgHandler) ScheduleCompactLog(firstIndex uint64, truncatedIndex uint64) {
raftLogGCTask := &runner.RaftLogGCTask{
RaftEngine: d.ctx.engine.Raft,
RegionID: d.regionId,
StartIdx: d.LastCompactedIdx,
EndIdx: truncatedIndex + 1,
}
d.LastCompactedIdx = raftLogGCTask.EndIdx
d.ctx.raftLogGCTaskSender <- raftLogGCTask
}
func (d *peerMsgHandler) onRaftMsg(msg *rspb.RaftMessage) error {
log.Debugf("%s handle raft message %s from %d to %d",
d.Tag, msg.GetMessage().GetMsgType(), msg.GetFromPeer().GetId(), msg.GetToPeer().GetId())
if !d.validateRaftMessage(msg) {
return nil
}
if d.stopped {
return nil
}
if msg.GetIsTombstone() {
// we receive a message tells us to remove self.
d.handleGCPeerMsg(msg)
return nil
}
if d.checkMessage(msg) {
return nil
}
key, err := d.checkSnapshot(msg)
if err != nil {
return err
}
if key != nil {
// If the snapshot file is not used again, then it's OK to
// delete them here. If the snapshot file will be reused when
// receiving, then it will fail to pass the check again, so
// missing snapshot files should not be noticed.
s, err1 := d.ctx.snapMgr.GetSnapshotForApplying(*key)
if err1 != nil {
return err1
}
d.ctx.snapMgr.DeleteSnapshot(*key, s, false)
return nil
}
d.insertPeerCache(msg.GetFromPeer())
err = d.RaftGroup.Step(*msg.GetMessage())
if err != nil {
return err
}
if d.AnyNewPeerCatchUp(msg.FromPeer.Id) {
d.HeartbeatScheduler(d.ctx.schedulerTaskSender)
}
return nil
}
// return false means the message is invalid, and can be ignored.
func (d *peerMsgHandler) validateRaftMessage(msg *rspb.RaftMessage) bool {
regionID := msg.GetRegionId()
from := msg.GetFromPeer()
to := msg.GetToPeer()
log.Debugf("[region %d] handle raft message %s from %d to %d", regionID, msg, from.GetId(), to.GetId())
if to.GetStoreId() != d.storeID() {
log.Warnf("[region %d] store not match, to store id %d, mine %d, ignore it",
regionID, to.GetStoreId(), d.storeID())
return false
}
if msg.RegionEpoch == nil {
log.Errorf("[region %d] missing epoch in raft message, ignore it", regionID)
return false
}
return true
}
/// Checks if the message is sent to the correct peer.
///
/// Returns true means that the message can be dropped silently.
func (d *peerMsgHandler) checkMessage(msg *rspb.RaftMessage) bool {
fromEpoch := msg.GetRegionEpoch()
isVoteMsg := util.IsVoteMessage(msg.Message)
fromStoreID := msg.FromPeer.GetStoreId()
// Let's consider following cases with three nodes [1, 2, 3] and 1 is leader:
// a. 1 removes 2, 2 may still send MsgAppendResponse to 1.
// We should ignore this stale message and let 2 remove itself after
// applying the ConfChange log.
// b. 2 is isolated, 1 removes 2. When 2 rejoins the cluster, 2 will
// send stale MsgRequestVote to 1 and 3, at this time, we should tell 2 to gc itself.
// c. 2 is isolated but can communicate with 3. 1 removes 3.
// 2 will send stale MsgRequestVote to 3, 3 should ignore this message.
// d. 2 is isolated but can communicate with 3. 1 removes 2, then adds 4, remove 3.
// 2 will send stale MsgRequestVote to 3, 3 should tell 2 to gc itself.
// e. 2 is isolated. 1 adds 4, 5, 6, removes 3, 1. Now assume 4 is leader.
// After 2 rejoins the cluster, 2 may send stale MsgRequestVote to 1 and 3,
// 1 and 3 will ignore this message. Later 4 will send messages to 2 and 2 will
// rejoin the raft group again.
// f. 2 is isolated. 1 adds 4, 5, 6, removes 3, 1. Now assume 4 is leader, and 4 removes 2.
// unlike case e, 2 will be stale forever.
// TODO: for case f, if 2 is stale for a long time, 2 will communicate with scheduler and scheduler will
// tell 2 is stale, so 2 can remove itself.
region := d.Region()
if util.IsEpochStale(fromEpoch, region.RegionEpoch) && util.FindPeer(region, fromStoreID) == nil {
// The message is stale and not in current region.
handleStaleMsg(d.ctx.trans, msg, region.RegionEpoch, isVoteMsg)
return true
}
target := msg.GetToPeer()
if target.Id < d.PeerId() {
log.Infof("%s target peer ID %d is less than %d, msg maybe stale", d.Tag, target.Id, d.PeerId())
return true
} else if target.Id > d.PeerId() {
if d.MaybeDestroy() {
log.Infof("%s is stale as received a larger peer %s, destroying", d.Tag, target)
d.destroyPeer()
d.ctx.router.sendStore(message.NewMsg(message.MsgTypeStoreRaftMessage, msg))
}
return true
}
return false
}
func handleStaleMsg(trans Transport, msg *rspb.RaftMessage, curEpoch *metapb.RegionEpoch,
needGC bool) {
regionID := msg.RegionId
fromPeer := msg.FromPeer
toPeer := msg.ToPeer
msgType := msg.Message.GetMsgType()
if !needGC {
log.Infof("[region %d] raft message %s is stale, current %v ignore it",
regionID, msgType, curEpoch)
return
}
gcMsg := &rspb.RaftMessage{
RegionId: regionID,
FromPeer: fromPeer,
ToPeer: toPeer,
RegionEpoch: curEpoch,
IsTombstone: true,
}
if err := trans.Send(gcMsg); err != nil {
log.Errorf("[region %d] send message failed %v", regionID, err)
}
}
func (d *peerMsgHandler) handleGCPeerMsg(msg *rspb.RaftMessage) {
fromEpoch := msg.RegionEpoch
if !util.IsEpochStale(d.Region().RegionEpoch, fromEpoch) {
return
}
if !util.PeerEqual(d.Meta, msg.ToPeer) {
log.Infof("%s receive stale gc msg, ignore", d.Tag)
return
}
log.Infof("%s peer %s receives gc message, trying to remove", d.Tag, msg.ToPeer)
if d.MaybeDestroy() {
d.destroyPeer()
}
}
// Returns `None` if the `msg` doesn't contain a snapshot or it contains a snapshot which
// doesn't conflict with any other snapshots or regions. Otherwise a `snap.SnapKey` is returned.
func (d *peerMsgHandler) checkSnapshot(msg *rspb.RaftMessage) (*snap.SnapKey, error) {
if msg.Message.Snapshot == nil {
return nil, nil
}
regionID := msg.RegionId
snapshot := msg.Message.Snapshot
key := snap.SnapKeyFromRegionSnap(regionID, snapshot)
snapData := new(rspb.RaftSnapshotData)
err := snapData.Unmarshal(snapshot.Data)
if err != nil {
return nil, err
}
snapRegion := snapData.Region
peerID := msg.ToPeer.Id
var contains bool
for _, peer := range snapRegion.Peers {
if peer.Id == peerID {
contains = true
break
}
}
if !contains {
log.Infof("%s %s doesn't contains peer %d, skip", d.Tag, snapRegion, peerID)
return &key, nil
}
meta := d.ctx.storeMeta
if !util.RegionEqual(meta.regions[d.regionId], d.Region()) {
if !d.isInitialized() {
log.Infof("%s stale delegate detected, skip", d.Tag)
return &key, nil
} else {
panic(fmt.Sprintf("%s meta corrupted %s != %s", d.Tag, meta.regions[d.regionId], d.Region()))
}
}
existRegions := meta.getOverlapRegions(snapRegion)
for _, existRegion := range existRegions {
if existRegion.GetId() == snapRegion.GetId() {
continue
}
log.Infof("%s region overlapped %s %s", d.Tag, existRegion, snapRegion)
return &key, nil
}
// check if snapshot file exists.
_, err = d.ctx.snapMgr.GetSnapshotForApplying(key)
if err != nil {
return nil, err
}
return nil, nil
}
func (d *peerMsgHandler) destroyPeer() {
log.Infof("%s starts destroy", d.Tag)
regionID := d.regionId
// We can't destroy a peer which is applying snapshot.
meta := d.ctx.storeMeta
isInitialized := d.isInitialized()
if err := d.Destroy(d.ctx.engine, false); err != nil {
// If not panic here, the peer will be recreated in the next restart,
// then it will be gc again. But if some overlap region is created
// before restarting, the gc action will delete the overlap region's
// data too.
panic(fmt.Sprintf("%s destroy peer %v", d.Tag, err))
}
d.ctx.router.close(regionID)
d.stopped = true
if isInitialized && meta.regionRanges.Delete(&regionItem{region: d.Region()}) == nil {
panic(d.Tag + " meta corruption detected")
}
if _, ok := meta.regions[regionID]; !ok {
panic(d.Tag + " meta corruption detected")
}
delete(meta.regions, regionID)
}
func (d *peerMsgHandler) findSiblingRegion() (result *metapb.Region) {
meta := d.ctx.storeMeta
item := &regionItem{region: d.Region()}
meta.regionRanges.AscendGreaterOrEqual(item, func(i btree.Item) bool {
result = i.(*regionItem).region
return true
})
return
}
func (d *peerMsgHandler) onRaftGCLogTick() {
d.ticker.schedule(PeerTickRaftLogGC)
if !d.IsLeader() {
return
}
appliedIdx := d.peerStorage.AppliedIndex()
firstIdx, _ := d.peerStorage.FirstIndex()
var compactIdx uint64
if appliedIdx > firstIdx && appliedIdx-firstIdx >= d.ctx.cfg.RaftLogGcCountLimit {
compactIdx = appliedIdx
} else {
return
}
y.Assert(compactIdx > 0)
compactIdx -= 1
if compactIdx < firstIdx {
// In case compact_idx == first_idx before subtraction.
return
}
term, err := d.RaftGroup.Raft.RaftLog.Term(compactIdx)
if err != nil {
log.Fatalf("appliedIdx: %d, firstIdx: %d, compactIdx: %d", appliedIdx, firstIdx, compactIdx)
panic(err)
}
// Create a compact log request and notify directly.
regionID := d.regionId
request := newCompactLogRequest(regionID, d.Meta, compactIdx, term)
d.proposeRaftCommand(request, nil)
}
func (d *peerMsgHandler) onSplitRegionCheckTick() {
d.ticker.schedule(PeerTickSplitRegionCheck)
// To avoid frequent scan, we only add new scan tasks if all previous tasks
// have finished.
if len(d.ctx.splitCheckTaskSender) > 0 {
return
}
if !d.IsLeader() {
return
}
if d.ApproximateSize != nil && d.SizeDiffHint < d.ctx.cfg.RegionSplitSize/8 {
return
}
d.ctx.splitCheckTaskSender <- &runner.SplitCheckTask{
Region: d.Region(),
}
d.SizeDiffHint = 0
}
func (d *peerMsgHandler) onPrepareSplitRegion(regionEpoch *metapb.RegionEpoch, splitKey []byte, cb *message.Callback) {
if err := d.validateSplitRegion(regionEpoch, splitKey); err != nil {
cb.Done(ErrResp(err))
return
}
region := d.Region()
d.ctx.schedulerTaskSender <- &runner.SchedulerAskSplitTask{
Region: region,
SplitKey: splitKey,
Peer: d.Meta,
Callback: cb,
}
}
func (d *peerMsgHandler) validateSplitRegion(epoch *metapb.RegionEpoch, splitKey []byte) error {
if len(splitKey) == 0 {
err := errors.Errorf("%s split key should not be empty", d.Tag)
log.Error(err)
return err
}
if !d.IsLeader() {
// region on this store is no longer leader, skipped.
log.Infof("%s not leader, skip", d.Tag)
return &util.ErrNotLeader{
RegionId: d.regionId,
Leader: d.getPeerFromCache(d.LeaderId()),
}
}
region := d.Region()
latestEpoch := region.GetRegionEpoch()
// This is a little difference for `check_region_epoch` in region split case.
// Here we just need to check `version` because `conf_ver` will be update
// to the latest value of the peer, and then send to Scheduler.
if latestEpoch.Version != epoch.Version {
log.Infof("%s epoch changed, retry later, prev_epoch: %s, epoch %s",
d.Tag, latestEpoch, epoch)
return &util.ErrEpochNotMatch{
Message: fmt.Sprintf("%s epoch changed %s != %s, retry later", d.Tag, latestEpoch, epoch),
Regions: []*metapb.Region{region},
}
}
return nil
}
func (d *peerMsgHandler) onApproximateRegionSize(size uint64) {
d.ApproximateSize = &size
}
func (d *peerMsgHandler) onSchedulerHeartbeatTick() {
d.ticker.schedule(PeerTickSchedulerHeartbeat)
if !d.IsLeader() {
return
}
d.HeartbeatScheduler(d.ctx.schedulerTaskSender)
}
func (d *peerMsgHandler) onGCSnap(snaps []snap.SnapKeyWithSending) {
compactedIdx := d.peerStorage.truncatedIndex()
compactedTerm := d.peerStorage.truncatedTerm()
for _, snapKeyWithSending := range snaps {
key := snapKeyWithSending.SnapKey
if snapKeyWithSending.IsSending {
snap, err := d.ctx.snapMgr.GetSnapshotForSending(key)
if err != nil {
log.Errorf("%s failed to load snapshot for %s %v", d.Tag, key, err)
continue
}
if key.Term < compactedTerm || key.Index < compactedIdx {
log.Infof("%s snap file %s has been compacted, delete", d.Tag, key)
d.ctx.snapMgr.DeleteSnapshot(key, snap, false)
} else if fi, err1 := snap.Meta(); err1 == nil {
modTime := fi.ModTime()
if time.Since(modTime) > 4*time.Hour {
log.Infof("%s snap file %s has been expired, delete", d.Tag, key)
d.ctx.snapMgr.DeleteSnapshot(key, snap, false)
}
}
} else if key.Term <= compactedTerm &&
(key.Index < compactedIdx || key.Index == compactedIdx) {
log.Infof("%s snap file %s has been applied, delete", d.Tag, key)
a, err := d.ctx.snapMgr.GetSnapshotForApplying(key)
if err != nil {
log.Errorf("%s failed to load snapshot for %s %v", d.Tag, key, err)
continue
}
d.ctx.snapMgr.DeleteSnapshot(key, a, false)
}
}
}
func newAdminRequest(regionID uint64, peer *metapb.Peer) *raft_cmdpb.RaftCmdRequest {
return &raft_cmdpb.RaftCmdRequest{
Header: &raft_cmdpb.RaftRequestHeader{
RegionId: regionID,
Peer: peer,
},
}
}
func newCompactLogRequest(regionID uint64, peer *metapb.Peer, compactIndex, compactTerm uint64) *raft_cmdpb.RaftCmdRequest {
req := newAdminRequest(regionID, peer)
req.AdminRequest = &raft_cmdpb.AdminRequest{
CmdType: raft_cmdpb.AdminCmdType_CompactLog,
CompactLog: &raft_cmdpb.CompactLogRequest{
CompactIndex: compactIndex,
CompactTerm: compactTerm,
},
}
return req
}

View File

@ -0,0 +1,449 @@
package raftstore
import (
"bytes"
"fmt"
"time"
"github.com/Connor1996/badger"
"github.com/Connor1996/badger/y"
"github.com/golang/protobuf/proto"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/runner"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/raft"
"github.com/pingcap/errors"
)
type ApplySnapResult struct {
// PrevRegion is the region before snapshot applied
PrevRegion *metapb.Region
Region *metapb.Region
}
var _ raft.Storage = new(PeerStorage)
type PeerStorage struct {
// Tag which is useful for printing log
Tag string
// The underlying storage
Engines *engine_util.Engines
// Cache for the persistent states
region *metapb.Region
raftState rspb.RaftLocalState
applyState rspb.RaftApplyState // (Should be updated too when applying committed entries)
lastTerm uint64
// States for generating snapshot
snapState snap.SnapState
regionSched chan<- worker.Task
snapTriedCnt int
}
func NewPeerStorage(engines *engine_util.Engines, region *metapb.Region, regionSched chan<- worker.Task, tag string) (*PeerStorage, error) {
log.Debugf("%s creating storage for %s", tag, region.String())
raftState, err := meta.InitRaftLocalState(engines.Raft, region)
if err != nil {
return nil, err
}
applyState, err := meta.InitApplyState(engines.Kv, region)
if err != nil {
return nil, err
}
if raftState.LastIndex < applyState.AppliedIndex {
panic(fmt.Sprintf("%s unexpected raft log index: lastIndex %d < appliedIndex %d",
tag, raftState.LastIndex, applyState.AppliedIndex))
}
lastTerm, err := meta.InitLastTerm(engines.Raft, region, raftState, applyState)
if err != nil {
return nil, err
}
return &PeerStorage{
Engines: engines,
region: region,
Tag: tag,
raftState: *raftState,
lastTerm: lastTerm,
regionSched: regionSched,
}, nil
}
func (ps *PeerStorage) InitialState() (eraftpb.HardState, eraftpb.ConfState, error) {
raftState := ps.raftState
if raft.IsEmptyHardState(*raftState.HardState) {
y.AssertTruef(!ps.isInitialized(),
"peer for region %s is initialized but local state %+v has empty hard state",
ps.region, ps.raftState)
return eraftpb.HardState{}, eraftpb.ConfState{}, nil
}
return *raftState.HardState, util.ConfStateFromRegion(ps.region), nil
}
func (ps *PeerStorage) Entries(low, high uint64) ([]eraftpb.Entry, error) {
if err := ps.checkRange(low, high); err != nil || low == high {
return nil, err
}
buf := make([]eraftpb.Entry, 0, high-low)
nextIndex := low
txn := ps.Engines.Raft.NewTransaction(false)
defer txn.Discard()
startKey := meta.RaftLogKey(ps.region.Id, low)
endKey := meta.RaftLogKey(ps.region.Id, high)
iter := txn.NewIterator(badger.DefaultIteratorOptions)
defer iter.Close()
for iter.Seek(startKey); iter.Valid(); iter.Next() {
item := iter.Item()
if bytes.Compare(item.Key(), endKey) >= 0 {
break
}
val, err := item.Value()
if err != nil {
return nil, err
}
var entry eraftpb.Entry
if err = entry.Unmarshal(val); err != nil {
return nil, err
}
// May meet gap or has been compacted.
if entry.Index != nextIndex {
break
}
nextIndex++
buf = append(buf, entry)
}
// If we get the correct number of entries, returns.
if len(buf) == int(high-low) {
return buf, nil
}
// Here means we don't fetch enough entries.
return nil, raft.ErrUnavailable
}
func (ps *PeerStorage) Term(idx uint64) (uint64, error) {
if idx == ps.truncatedIndex() {
return ps.truncatedTerm(), nil
}
if err := ps.checkRange(idx, idx+1); err != nil {
return 0, err
}
if ps.truncatedTerm() == ps.lastTerm || idx == ps.raftState.LastIndex {
return ps.lastTerm, nil
}
var entry eraftpb.Entry
if err := engine_util.GetMeta(ps.Engines.Raft, meta.RaftLogKey(ps.region.Id, idx), &entry); err != nil {
return 0, err
}
return entry.Term, nil
}
func (ps *PeerStorage) LastIndex() (uint64, error) {
return ps.raftState.LastIndex, nil
}
func (ps *PeerStorage) FirstIndex() (uint64, error) {
return ps.truncatedIndex() + 1, nil
}
func (ps *PeerStorage) Snapshot() (eraftpb.Snapshot, error) {
var snapshot eraftpb.Snapshot
if ps.snapState.StateType == snap.SnapState_Generating {
select {
case s := <-ps.snapState.Receiver:
snapshot = *s
default:
return snapshot, raft.ErrSnapshotTemporarilyUnavailable
}
ps.snapState.StateType = snap.SnapState_Relax
if snapshot.GetMetadata() != nil {
ps.snapTriedCnt = 0
if ps.validateSnap(&snapshot) {
return snapshot, nil
}
} else {
log.Warnf("%s failed to try generating snapshot, times: %d", ps.Tag, ps.snapTriedCnt)
}
}
if ps.snapTriedCnt >= 5 {
err := errors.Errorf("failed to get snapshot after %d times", ps.snapTriedCnt)
ps.snapTriedCnt = 0
return snapshot, err
}
log.Infof("%s requesting snapshot", ps.Tag)
ps.snapTriedCnt++
ps.ScheduleGenerateSnapshot()
return snapshot, raft.ErrSnapshotTemporarilyUnavailable
}
func (ps *PeerStorage) ScheduleGenerateSnapshot() {
ch := make(chan *eraftpb.Snapshot, 1)
ps.snapState = snap.SnapState{
StateType: snap.SnapState_Generating,
Receiver: ch,
}
ps.regionSched <- &runner.RegionTaskGen{
RegionId: ps.region.GetId(),
Notifier: ch,
}
}
func (ps *PeerStorage) isInitialized() bool {
return len(ps.region.Peers) > 0
}
func (ps *PeerStorage) Region() *metapb.Region {
return ps.region
}
func (ps *PeerStorage) SetRegion(region *metapb.Region) {
ps.region = region
}
func (ps *PeerStorage) checkRange(low, high uint64) error {
if low > high {
return errors.Errorf("low %d is greater than high %d", low, high)
} else if low <= ps.truncatedIndex() {
return raft.ErrCompacted
} else if high > ps.raftState.LastIndex+1 {
return errors.Errorf("entries' high %d is out of bound, lastIndex %d",
high, ps.raftState.LastIndex)
}
return nil
}
func (ps *PeerStorage) truncatedIndex() uint64 {
return ps.applyState.TruncatedState.Index
}
func (ps *PeerStorage) truncatedTerm() uint64 {
return ps.applyState.TruncatedState.Term
}
func (ps *PeerStorage) AppliedIndex() uint64 {
return ps.applyState.AppliedIndex
}
func (ps *PeerStorage) validateSnap(snap *eraftpb.Snapshot) bool {
idx := snap.GetMetadata().GetIndex()
if idx < ps.truncatedIndex() {
log.Infof("%s snapshot is stale, generate again, snapIndex: %d, truncatedIndex: %d", ps.Tag, idx, ps.truncatedIndex())
return false
}
var snapData rspb.RaftSnapshotData
if err := proto.UnmarshalMerge(snap.GetData(), &snapData); err != nil {
log.Errorf("%s failed to decode snapshot, it may be corrupted, err: %v", ps.Tag, err)
return false
}
snapEpoch := snapData.GetRegion().GetRegionEpoch()
latestEpoch := ps.region.GetRegionEpoch()
if snapEpoch.GetConfVer() < latestEpoch.GetConfVer() {
log.Infof("%s snapshot epoch is stale, snapEpoch: %s, latestEpoch: %s", ps.Tag, snapEpoch, latestEpoch)
return false
}
return true
}
// Append the given entries to the raft log using previous last index or self.last_index.
// Return the new last index for later update. After we commit in engine, we can set last_index
// to the return one.
func (ps *PeerStorage) Append(entries []eraftpb.Entry, raftWB *engine_util.WriteBatch) error {
log.Debugf("%s append %d entries", ps.Tag, len(entries))
prevLastIndex := ps.raftState.LastIndex
if len(entries) == 0 {
return nil
}
lastEntry := entries[len(entries)-1]
lastIndex := lastEntry.Index
lastTerm := lastEntry.Term
for _, entry := range entries {
err := raftWB.SetMeta(meta.RaftLogKey(ps.region.Id, entry.Index), &entry)
if err != nil {
return err
}
}
// Delete any previously appended log entries which never committed.
for i := lastIndex + 1; i <= prevLastIndex; i++ {
raftWB.DeleteMeta(meta.RaftLogKey(ps.region.Id, i))
}
ps.raftState.LastIndex = lastIndex
ps.lastTerm = lastTerm
return nil
}
func (ps *PeerStorage) clearMeta(kvWB, raftWB *engine_util.WriteBatch) error {
return ClearMeta(ps.Engines, kvWB, raftWB, ps.region.Id, ps.raftState.LastIndex)
}
// Delete all data that is not covered by `new_region`.
func (ps *PeerStorage) clearExtraData(newRegion *metapb.Region) {
oldStartKey, oldEndKey := ps.region.GetStartKey(), ps.region.GetEndKey()
newStartKey, newEndKey := newRegion.GetStartKey(), newRegion.GetEndKey()
regionId := newRegion.Id
if bytes.Compare(oldStartKey, newStartKey) < 0 {
ps.regionSched <- &runner.RegionTaskDestroy{
RegionId: regionId,
StartKey: oldStartKey,
EndKey: newStartKey,
}
}
if bytes.Compare(newEndKey, oldEndKey) < 0 {
ps.regionSched <- &runner.RegionTaskDestroy{
RegionId: regionId,
StartKey: newEndKey,
EndKey: oldEndKey,
}
}
}
func ClearMeta(engines *engine_util.Engines, kvWB, raftWB *engine_util.WriteBatch, regionID uint64, lastIndex uint64) error {
start := time.Now()
kvWB.DeleteMeta(meta.RegionStateKey(regionID))
kvWB.DeleteMeta(meta.ApplyStateKey(regionID))
firstIndex := lastIndex + 1
beginLogKey := meta.RaftLogKey(regionID, 0)
endLogKey := meta.RaftLogKey(regionID, firstIndex)
err := engines.Raft.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
it.Seek(beginLogKey)
if it.Valid() && bytes.Compare(it.Item().Key(), endLogKey) < 0 {
logIdx, err1 := meta.RaftLogIndex(it.Item().Key())
if err1 != nil {
return err1
}
firstIndex = logIdx
}
return nil
})
if err != nil {
return err
}
for i := firstIndex; i <= lastIndex; i++ {
raftWB.DeleteMeta(meta.RaftLogKey(regionID, i))
}
raftWB.DeleteMeta(meta.RaftStateKey(regionID))
log.Infof(
"[region %d] clear peer 1 meta key 1 apply key 1 raft key and %d raft logs, takes %v",
regionID,
lastIndex+1-firstIndex,
time.Since(start),
)
return nil
}
// Apply the peer with given snapshot.
func (ps *PeerStorage) ApplySnapshot(snap *eraftpb.Snapshot, kvWB *engine_util.WriteBatch, raftWB *engine_util.WriteBatch) (*ApplySnapResult, error) {
log.Infof("%v begin to apply snapshot", ps.Tag)
snapData := new(rspb.RaftSnapshotData)
if err := snapData.Unmarshal(snap.Data); err != nil {
return nil, err
}
if snapData.Region.Id != ps.region.Id {
return nil, fmt.Errorf("mismatch region id %v != %v", snapData.Region.Id, ps.region.Id)
}
if ps.isInitialized() {
// we can only delete the old data when the peer is initialized.
if err := ps.clearMeta(kvWB, raftWB); err != nil {
return nil, err
}
}
ps.raftState.LastIndex = snap.Metadata.Index
ps.lastTerm = snap.Metadata.Term
applyRes := &ApplySnapResult{
PrevRegion: ps.region,
Region: snapData.Region,
}
// cleanup data before scheduling apply worker.Task
if ps.isInitialized() {
ps.clearExtraData(snapData.Region)
}
ps.region = snapData.Region
ps.applyState = rspb.RaftApplyState{
AppliedIndex: snap.Metadata.Index,
// The snapshot only contains log which index > applied index, so
// here the truncate state's (index, term) is in snapshot metadata.
TruncatedState: &rspb.RaftTruncatedState{
Index: snap.Metadata.Index,
Term: snap.Metadata.Term,
},
}
kvWB.SetMeta(meta.ApplyStateKey(ps.region.GetId()), &ps.applyState)
ps.ScheduleApplyingSnapshotAndWait(snapData.Region, snap.Metadata)
meta.WriteRegionState(kvWB, snapData.Region, rspb.PeerState_Normal)
log.Debugf("%v apply snapshot for region %v with state %v ok", ps.Tag, snapData.Region, ps.applyState)
return applyRes, nil
}
/// Save memory states to disk.
/// Do not modify ready in this function, this is a requirement to advance the ready object properly later.
func (ps *PeerStorage) SaveReadyState(ready *raft.Ready) (*ApplySnapResult, error) {
kvWB, raftWB := new(engine_util.WriteBatch), new(engine_util.WriteBatch)
prevRaftState := ps.raftState
var applyRes *ApplySnapResult = nil
var err error
if !raft.IsEmptySnap(&ready.Snapshot) {
applyRes, err = ps.ApplySnapshot(&ready.Snapshot, kvWB, raftWB)
if err != nil {
return nil, err
}
}
if len(ready.Entries) != 0 {
if err := ps.Append(ready.Entries, raftWB); err != nil {
return nil, err
}
}
if !raft.IsEmptyHardState(ready.HardState) {
ps.raftState.HardState = &ready.HardState
}
if !proto.Equal(&prevRaftState, &ps.raftState) {
raftWB.SetMeta(meta.RaftStateKey(ps.region.GetId()), &ps.raftState)
}
kvWB.MustWriteToDB(ps.Engines.Kv)
raftWB.MustWriteToDB(ps.Engines.Raft)
return applyRes, nil
}
func (ps *PeerStorage) ScheduleApplyingSnapshotAndWait(snapRegion *metapb.Region, snapMeta *eraftpb.SnapshotMetadata) {
ch := make(chan bool)
ps.snapState = snap.SnapState{
StateType: snap.SnapState_Applying,
}
ps.regionSched <- &runner.RegionTaskApply{
RegionId: ps.region.Id,
Notifier: ch,
SnapMeta: snapMeta,
StartKey: snapRegion.GetStartKey(),
EndKey: snapRegion.GetEndKey(),
}
<-ch
}
func (ps *PeerStorage) ClearData() {
ps.regionSched <- &runner.RegionTaskDestroy{
RegionId: ps.region.GetId(),
StartKey: ps.region.GetStartKey(),
EndKey: ps.region.GetEndKey(),
}
}

View File

@ -0,0 +1,240 @@
package raftstore
import (
"bytes"
"testing"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/raft"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newTestPeerStorage(t *testing.T) *PeerStorage {
engines := util.NewTestEngines()
err := BootstrapStore(engines, 1, 1)
require.Nil(t, err)
region, err := PrepareBootstrap(engines, 1, 1, 1)
require.Nil(t, err)
peerStore, err := NewPeerStorage(engines, region, nil, "")
require.Nil(t, err)
return peerStore
}
func newTestPeerStorageFromEnts(t *testing.T, ents []eraftpb.Entry) *PeerStorage {
peerStore := newTestPeerStorage(t)
kvWB := new(engine_util.WriteBatch)
raftWB := new(engine_util.WriteBatch)
require.Nil(t, peerStore.Append(ents[1:], raftWB))
applyState := peerStore.applyState
applyState.TruncatedState = &rspb.RaftTruncatedState{
Index: ents[0].Index,
Term: ents[0].Term,
}
applyState.AppliedIndex = ents[len(ents)-1].Index
kvWB.SetMeta(meta.ApplyStateKey(peerStore.region.GetId()), &applyState)
require.Nil(t, peerStore.Engines.WriteRaft(raftWB))
peerStore.Engines.WriteKV(kvWB)
return peerStore
}
func cleanUpTestData(peerStore *PeerStorage) {
if err := peerStore.Engines.Destroy(); err != nil {
panic(err)
}
}
func newTestEntry(index, term uint64) eraftpb.Entry {
return eraftpb.Entry{
Index: index,
Term: term,
Data: []byte{0},
}
}
func TestPeerStorageTerm(t *testing.T) {
ents := []eraftpb.Entry{
newTestEntry(3, 3), newTestEntry(4, 4), newTestEntry(5, 5),
}
tests := []struct {
idx uint64
term uint64
err error
}{
{2, 0, raft.ErrCompacted},
{3, 3, nil},
{4, 4, nil},
{5, 5, nil},
}
for _, tt := range tests {
peerStore := newTestPeerStorageFromEnts(t, ents)
term, err := peerStore.Term(tt.idx)
if err != nil {
assert.Equal(t, err, tt.err)
} else {
assert.Equal(t, term, tt.term)
}
cleanUpTestData(peerStore)
}
}
func appendEnts(t *testing.T, peerStore *PeerStorage, ents []eraftpb.Entry) {
raftWB := new(engine_util.WriteBatch)
require.Nil(t, peerStore.Append(ents, raftWB))
raftWB.SetMeta(meta.RaftStateKey(peerStore.region.GetId()), &peerStore.raftState)
require.Nil(t, peerStore.Engines.WriteRaft(raftWB))
}
func getMetaKeyCount(t *testing.T, peerStore *PeerStorage) int {
regionID := peerStore.region.Id
count := 0
metaStart := meta.RegionMetaPrefixKey(regionID)
metaEnd := meta.RegionMetaPrefixKey(regionID + 1)
err := peerStore.Engines.Kv.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
for it.Seek(metaStart); it.Valid(); it.Next() {
if bytes.Compare(it.Item().Key(), metaEnd) >= 0 {
break
}
count++
}
return nil
})
require.Nil(t, err)
raftStart := meta.RegionRaftPrefixKey(regionID)
raftEnd := meta.RegionRaftPrefixKey(regionID + 1)
err = peerStore.Engines.Kv.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
for it.Seek(metaStart); it.Valid(); it.Next() {
if bytes.Compare(it.Item().Key(), metaEnd) >= 0 {
break
}
count++
}
return nil
})
require.Nil(t, err)
err = peerStore.Engines.Raft.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
for it.Seek(raftStart); it.Valid(); it.Next() {
if bytes.Compare(it.Item().Key(), raftEnd) >= 0 {
break
}
count++
}
return nil
})
require.Nil(t, err)
return count
}
func TestPeerStorageClearMeta(t *testing.T) {
peerStore := newTestPeerStorageFromEnts(t, []eraftpb.Entry{
newTestEntry(3, 3),
newTestEntry(4, 4),
})
defer cleanUpTestData(peerStore)
appendEnts(t, peerStore, []eraftpb.Entry{
newTestEntry(5, 5),
newTestEntry(6, 6),
})
assert.Equal(t, 6, getMetaKeyCount(t, peerStore))
kvWB := new(engine_util.WriteBatch)
raftWB := new(engine_util.WriteBatch)
require.Nil(t, peerStore.clearMeta(kvWB, raftWB))
require.Nil(t, peerStore.Engines.WriteKV(kvWB))
require.Nil(t, peerStore.Engines.WriteRaft(raftWB))
assert.Equal(t, 0, getMetaKeyCount(t, peerStore))
}
func TestPeerStorageEntries(t *testing.T) {
ents := []eraftpb.Entry{
newTestEntry(3, 3),
newTestEntry(4, 4),
newTestEntry(5, 5),
newTestEntry(6, 6),
}
tests := []struct {
low uint64
high uint64
entries []eraftpb.Entry
err error
}{
{2, 6, nil, raft.ErrCompacted},
{3, 4, nil, raft.ErrCompacted},
{4, 5, []eraftpb.Entry{
newTestEntry(4, 4),
}, nil},
{4, 6, []eraftpb.Entry{
newTestEntry(4, 4),
newTestEntry(5, 5),
}, nil},
}
for i, tt := range tests {
peerStore := newTestPeerStorageFromEnts(t, ents)
defer cleanUpTestData(peerStore)
entries, err := peerStore.Entries(tt.low, tt.high)
if err != nil {
assert.Equal(t, tt.err, err)
} else {
assert.Equal(t, tt.entries, entries, "%d", i)
}
}
}
func TestPeerStorageAppend(t *testing.T) {
ents := []eraftpb.Entry{
newTestEntry(3, 3), newTestEntry(4, 4), newTestEntry(5, 5)}
tests := []struct {
appends []eraftpb.Entry
results []eraftpb.Entry
}{
{
[]eraftpb.Entry{newTestEntry(3, 3), newTestEntry(4, 4), newTestEntry(5, 5)},
[]eraftpb.Entry{newTestEntry(4, 4), newTestEntry(5, 5)},
},
{
[]eraftpb.Entry{newTestEntry(3, 3), newTestEntry(4, 6), newTestEntry(5, 6)},
[]eraftpb.Entry{newTestEntry(4, 6), newTestEntry(5, 6)},
},
{
[]eraftpb.Entry{
newTestEntry(3, 3),
newTestEntry(4, 4),
newTestEntry(5, 5),
newTestEntry(6, 5),
},
[]eraftpb.Entry{newTestEntry(4, 4), newTestEntry(5, 5), newTestEntry(6, 5)},
},
// truncate incoming entries, truncate the existing entries and append
{
[]eraftpb.Entry{newTestEntry(2, 3), newTestEntry(3, 3), newTestEntry(4, 5)},
[]eraftpb.Entry{newTestEntry(4, 5)},
},
// truncate the existing entries and append
{[]eraftpb.Entry{newTestEntry(4, 5)}, []eraftpb.Entry{newTestEntry(4, 5)}},
// direct append
{
[]eraftpb.Entry{newTestEntry(6, 5)},
[]eraftpb.Entry{newTestEntry(4, 4), newTestEntry(5, 5), newTestEntry(6, 5)},
},
}
for _, tt := range tests {
peerStore := newTestPeerStorageFromEnts(t, ents)
defer cleanUpTestData(peerStore)
appendEnts(t, peerStore, tt.appends)
li := peerStore.raftState.LastIndex
acutualEntries, err := peerStore.Entries(4, li+1)
require.Nil(t, err)
assert.Equal(t, tt.results, acutualEntries)
}
}

View File

@ -0,0 +1,69 @@
package raftstore
import (
"sync"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
)
// raftWorker is responsible for run raft commands and apply raft logs.
type raftWorker struct {
pr *router
raftCh chan message.Msg
ctx *GlobalContext
closeCh <-chan struct{}
}
func newRaftWorker(ctx *GlobalContext, pm *router) *raftWorker {
return &raftWorker{
raftCh: pm.peerSender,
ctx: ctx,
pr: pm,
}
}
// run runs raft commands.
// On each loop, raft commands are batched by channel buffer.
// After commands are handled, we collect apply messages by peers, make a applyBatch, send it to apply channel.
func (rw *raftWorker) run(closeCh <-chan struct{}, wg *sync.WaitGroup) {
defer wg.Done()
var msgs []message.Msg
for {
msgs = msgs[:0]
select {
case <-closeCh:
return
case msg := <-rw.raftCh:
msgs = append(msgs, msg)
}
pending := len(rw.raftCh)
for i := 0; i < pending; i++ {
msgs = append(msgs, <-rw.raftCh)
}
peerStateMap := make(map[uint64]*peerState)
for _, msg := range msgs {
peerState := rw.getPeerState(peerStateMap, msg.RegionID)
if peerState == nil {
continue
}
newPeerMsgHandler(peerState.peer, rw.ctx).HandleMsg(msg)
}
for _, peerState := range peerStateMap {
newPeerMsgHandler(peerState.peer, rw.ctx).HandleRaftReady()
}
}
}
func (rw *raftWorker) getPeerState(peersMap map[uint64]*peerState, regionID uint64) *peerState {
peer, ok := peersMap[regionID]
if !ok {
peer = rw.pr.get(regionID)
if peer == nil {
return nil
}
peersMap[regionID] = peer
}
return peer
}

104
kv/raftstore/router.go Normal file
View File

@ -0,0 +1,104 @@
package raftstore
import (
"sync"
"sync/atomic"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
)
// peerState contains the peer states that needs to run raft command and apply command.
type peerState struct {
closed uint32
peer *peer
}
// router routes a message to a peer.
type router struct {
peers sync.Map // regionID -> peerState
peerSender chan message.Msg
storeSender chan<- message.Msg
}
func newRouter(storeSender chan<- message.Msg) *router {
pm := &router{
peerSender: make(chan message.Msg, 40960),
storeSender: storeSender,
}
return pm
}
func (pr *router) get(regionID uint64) *peerState {
v, ok := pr.peers.Load(regionID)
if ok {
return v.(*peerState)
}
return nil
}
func (pr *router) register(peer *peer) {
id := peer.regionId
newPeer := &peerState{
peer: peer,
}
pr.peers.Store(id, newPeer)
}
func (pr *router) close(regionID uint64) {
v, ok := pr.peers.Load(regionID)
if ok {
ps := v.(*peerState)
atomic.StoreUint32(&ps.closed, 1)
pr.peers.Delete(regionID)
}
}
func (pr *router) send(regionID uint64, msg message.Msg) error {
msg.RegionID = regionID
p := pr.get(regionID)
if p == nil || atomic.LoadUint32(&p.closed) == 1 {
return errPeerNotFound
}
pr.peerSender <- msg
return nil
}
func (pr *router) sendStore(msg message.Msg) {
pr.storeSender <- msg
}
var errPeerNotFound = errors.New("peer not found")
type RaftstoreRouter struct {
router *router
}
func NewRaftstoreRouter(router *router) *RaftstoreRouter {
return &RaftstoreRouter{router: router}
}
func (r *RaftstoreRouter) Send(regionID uint64, msg message.Msg) error {
return r.router.send(regionID, msg)
}
func (r *RaftstoreRouter) SendRaftMessage(msg *raft_serverpb.RaftMessage) error {
regionID := msg.RegionId
if r.router.send(regionID, message.NewPeerMsg(message.MsgTypeRaftMessage, regionID, msg)) != nil {
r.router.sendStore(message.NewPeerMsg(message.MsgTypeStoreRaftMessage, regionID, msg))
}
return nil
}
func (r *RaftstoreRouter) SendRaftCommand(req *raft_cmdpb.RaftCmdRequest, cb *message.Callback) error {
cmd := &message.MsgRaftCmd{
Request: req,
Callback: cb,
}
regionID := req.Header.RegionId
return r.router.send(regionID, message.NewPeerMsg(message.MsgTypeRaftCmd, regionID, cmd))
}

View File

@ -0,0 +1,90 @@
package runner
import (
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
)
type RaftLogGCTask struct {
RaftEngine *badger.DB
RegionID uint64
StartIdx uint64
EndIdx uint64
}
type raftLogGcTaskRes uint64
type raftLogGCTaskHandler struct {
taskResCh chan<- raftLogGcTaskRes
}
func NewRaftLogGCTaskHandler() *raftLogGCTaskHandler {
return &raftLogGCTaskHandler{}
}
// gcRaftLog does the GC job and returns the count of logs collected.
func (r *raftLogGCTaskHandler) gcRaftLog(raftDb *badger.DB, regionId, startIdx, endIdx uint64) (uint64, error) {
// Find the raft log idx range needed to be gc.
firstIdx := startIdx
if firstIdx == 0 {
firstIdx = endIdx
err := raftDb.View(func(txn *badger.Txn) error {
startKey := meta.RaftLogKey(regionId, 0)
ite := txn.NewIterator(badger.DefaultIteratorOptions)
defer ite.Close()
if ite.Seek(startKey); ite.Valid() {
var err error
if firstIdx, err = meta.RaftLogIndex(ite.Item().Key()); err != nil {
return err
}
}
return nil
})
if err != nil {
return 0, err
}
}
if firstIdx >= endIdx {
log.Infof("no need to gc, [regionId: %d]", regionId)
return 0, nil
}
raftWb := engine_util.WriteBatch{}
for idx := firstIdx; idx < endIdx; idx += 1 {
key := meta.RaftLogKey(regionId, idx)
raftWb.DeleteMeta(key)
}
if raftWb.Len() != 0 {
if err := raftWb.WriteToDB(raftDb); err != nil {
return 0, err
}
}
return endIdx - firstIdx, nil
}
func (r *raftLogGCTaskHandler) reportCollected(collected uint64) {
if r.taskResCh == nil {
return
}
r.taskResCh <- raftLogGcTaskRes(collected)
}
func (r *raftLogGCTaskHandler) Handle(t worker.Task) {
logGcTask, ok := t.(*RaftLogGCTask)
if !ok {
log.Error("unsupported worker.Task: %+v", t)
return
}
log.Debugf("execute gc log. [regionId: %d, endIndex: %d]", logGcTask.RegionID, logGcTask.EndIdx)
collected, err := r.gcRaftLog(logGcTask.RaftEngine, logGcTask.RegionID, logGcTask.StartIdx, logGcTask.EndIdx)
if err != nil {
log.Errorf("failed to gc. [regionId: %d, collected: %d, err: %v]", logGcTask.RegionID, collected, err)
} else {
log.Debugf("collected log entries. [regionId: %d, entryCount: %d]", logGcTask.RegionID, collected)
}
r.reportCollected(collected)
}

View File

@ -0,0 +1,208 @@
package runner
import (
"encoding/hex"
"fmt"
"time"
"github.com/Connor1996/badger"
"github.com/juju/errors"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
)
// There're some tasks for region worker, such as:
// `TaskTypeRegionGen` which will cause the worker to generate a snapshot according to RegionId,
// `TaskTypeRegionApply` which will apply a snapshot to the region that id equals RegionId,
// `TaskTypeRegionDestroy` which will clean up the key range from StartKey to EndKey.
type RegionTaskGen struct {
RegionId uint64 // specify the region which the task is for.
Notifier chan<- *eraftpb.Snapshot // when it finishes snapshot generating, it notifies notifier.
}
type RegionTaskApply struct {
RegionId uint64 // specify the region which the task is for.
Notifier chan<- bool // when it finishes snapshot applying, it notifies notifier.
SnapMeta *eraftpb.SnapshotMetadata // the region meta information of the snapshot
StartKey []byte // `StartKey` and `EndKey` are origin region's range, it's used to clean up certain range of region before applying snapshot.
EndKey []byte
}
type RegionTaskDestroy struct {
RegionId uint64 // specify the region which the task is for.
StartKey []byte // `StartKey` and `EndKey` are used to destroy certain range of region.
EndKey []byte
}
type regionTaskHandler struct {
ctx *snapContext
}
func NewRegionTaskHandler(engines *engine_util.Engines, mgr *snap.SnapManager) *regionTaskHandler {
return &regionTaskHandler{
ctx: &snapContext{
engines: engines,
mgr: mgr,
},
}
}
func (r *regionTaskHandler) Handle(t worker.Task) {
switch t.(type) {
case *RegionTaskGen:
task := t.(*RegionTaskGen)
// It is safe for now to handle generating and applying snapshot concurrently,
// but it may not when merge is implemented.
r.ctx.handleGen(task.RegionId, task.Notifier)
case *RegionTaskApply:
task := t.(*RegionTaskApply)
r.ctx.handleApply(task.RegionId, task.Notifier, task.StartKey, task.EndKey, task.SnapMeta)
case *RegionTaskDestroy:
task := t.(*RegionTaskDestroy)
r.ctx.cleanUpRange(task.RegionId, task.StartKey, task.EndKey)
}
}
type snapContext struct {
engines *engine_util.Engines
batchSize uint64
mgr *snap.SnapManager
}
// handleGen handles the task of generating snapshot of the Region.
func (snapCtx *snapContext) handleGen(regionId uint64, notifier chan<- *eraftpb.Snapshot) {
snap, err := doSnapshot(snapCtx.engines, snapCtx.mgr, regionId)
if err != nil {
log.Errorf("failed to generate snapshot!!!, [regionId: %d, err : %v]", regionId, err)
} else {
notifier <- snap
}
}
// applySnap applies snapshot data of the Region.
func (snapCtx *snapContext) applySnap(regionId uint64, startKey, endKey []byte, snapMeta *eraftpb.SnapshotMetadata) error {
log.Infof("begin apply snap data. [regionId: %d]", regionId)
// cleanUpOriginData clear up the region data before applying snapshot
snapCtx.cleanUpRange(regionId, startKey, endKey)
snapKey := snap.SnapKey{RegionID: regionId, Index: snapMeta.Index, Term: snapMeta.Term}
snapCtx.mgr.Register(snapKey, snap.SnapEntryApplying)
defer snapCtx.mgr.Deregister(snapKey, snap.SnapEntryApplying)
snapshot, err := snapCtx.mgr.GetSnapshotForApplying(snapKey)
if err != nil {
return errors.New(fmt.Sprintf("missing snapshot file %s", err))
}
t := time.Now()
applyOptions := snap.NewApplyOptions(snapCtx.engines.Kv, &metapb.Region{
Id: regionId,
StartKey: startKey,
EndKey: endKey,
})
if err := snapshot.Apply(*applyOptions); err != nil {
return err
}
log.Infof("applying new data. [regionId: %d, timeTakes: %v]", regionId, time.Now().Sub(t))
return nil
}
// handleApply tries to apply the snapshot of the specified Region. It calls `applySnap` to do the actual work.
func (snapCtx *snapContext) handleApply(regionId uint64, notifier chan<- bool, startKey, endKey []byte, snapMeta *eraftpb.SnapshotMetadata) {
err := snapCtx.applySnap(regionId, startKey, endKey, snapMeta)
if err != nil {
notifier <- false
log.Fatalf("failed to apply snap!!!. err: %v", err)
}
notifier <- true
}
// cleanUpRange cleans up the data within the range.
func (snapCtx *snapContext) cleanUpRange(regionId uint64, startKey, endKey []byte) {
if err := engine_util.DeleteRange(snapCtx.engines.Kv, startKey, endKey); err != nil {
log.Fatalf("failed to delete data in range, [regionId: %d, startKey: %s, endKey: %s, err: %v]", regionId,
hex.EncodeToString(startKey), hex.EncodeToString(endKey), err)
} else {
log.Infof("succeed in deleting data in range. [regionId: %d, startKey: %s, endKey: %s]", regionId,
hex.EncodeToString(startKey), hex.EncodeToString(endKey))
}
}
func getAppliedIdxTermForSnapshot(raft *badger.DB, kv *badger.Txn, regionId uint64) (uint64, uint64, error) {
applyState := new(rspb.RaftApplyState)
err := engine_util.GetMetaFromTxn(kv, meta.ApplyStateKey(regionId), applyState)
if err != nil {
return 0, 0, err
}
idx := applyState.AppliedIndex
var term uint64
if idx == applyState.TruncatedState.Index {
term = applyState.TruncatedState.Term
} else {
entry, err := meta.GetRaftEntry(raft, regionId, idx)
if err != nil {
return 0, 0, err
} else {
term = entry.GetTerm()
}
}
return idx, term, nil
}
func doSnapshot(engines *engine_util.Engines, mgr *snap.SnapManager, regionId uint64) (*eraftpb.Snapshot, error) {
log.Debugf("begin to generate a snapshot. [regionId: %d]", regionId)
txn := engines.Kv.NewTransaction(false)
index, term, err := getAppliedIdxTermForSnapshot(engines.Raft, txn, regionId)
if err != nil {
return nil, err
}
key := snap.SnapKey{RegionID: regionId, Index: index, Term: term}
mgr.Register(key, snap.SnapEntryGenerating)
defer mgr.Deregister(key, snap.SnapEntryGenerating)
regionState := new(rspb.RegionLocalState)
err = engine_util.GetMetaFromTxn(txn, meta.RegionStateKey(regionId), regionState)
if err != nil {
panic(err)
}
if regionState.GetState() != rspb.PeerState_Normal {
return nil, errors.Errorf("snap job %d seems stale, skip", regionId)
}
region := regionState.GetRegion()
confState := util.ConfStateFromRegion(region)
snapshot := &eraftpb.Snapshot{
Metadata: &eraftpb.SnapshotMetadata{
Index: key.Index,
Term: key.Term,
ConfState: &confState,
},
}
s, err := mgr.GetSnapshotForBuilding(key)
if err != nil {
return nil, err
}
// Set snapshot data
snapshotData := &rspb.RaftSnapshotData{Region: region}
snapshotStatics := snap.SnapStatistics{}
err = s.Build(txn, region, snapshotData, &snapshotStatics, mgr)
if err != nil {
return nil, err
}
snapshot.Data, err = snapshotData.Marshal()
return snapshot, err
}

View File

@ -0,0 +1,278 @@
package runner
import (
"encoding/binary"
"io"
"io/ioutil"
"testing"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/codec"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// copySnapshot is a helper function to copy snapshot.
// Only used in tests.
func copySnapshot(to, from snap.Snapshot) error {
if !to.Exists() {
_, err := io.Copy(to, from)
if err != nil {
return errors.WithStack(err)
}
return to.Save()
}
return nil
}
func newEnginesWithKVDb(t *testing.T, kv *badger.DB) *engine_util.Engines {
engines := new(engine_util.Engines)
engines.Kv = kv
var err error
engines.RaftPath, err = ioutil.TempDir("", "tinykv_raft")
require.Nil(t, err)
raftOpts := badger.DefaultOptions
raftOpts.Dir = engines.RaftPath
raftOpts.ValueDir = engines.RaftPath
raftOpts.ValueThreshold = 256
engines.Raft, err = badger.Open(raftOpts)
require.Nil(t, err)
return engines
}
func getTestDBForRegions(t *testing.T, path string, regions []uint64) *badger.DB {
db := openDB(t, path)
fillDBData(t, db)
for _, regionID := range regions {
// Put apply state into kv engine.
applyState := &rspb.RaftApplyState{
AppliedIndex: 10,
TruncatedState: &rspb.RaftTruncatedState{
Index: 10,
},
}
require.Nil(t, engine_util.PutMeta(db, meta.ApplyStateKey(regionID), applyState))
// Put region info into kv engine.
region := genTestRegion(regionID, 1, 1)
regionState := new(rspb.RegionLocalState)
regionState.Region = region
require.Nil(t, engine_util.PutMeta(db, meta.RegionStateKey(regionID), regionState))
}
return db
}
func genTestRegion(regionID, storeID, peerID uint64) *metapb.Region {
return &metapb.Region{
Id: regionID,
StartKey: []byte(""),
EndKey: []byte(""),
RegionEpoch: &metapb.RegionEpoch{
Version: 1,
ConfVer: 1,
},
Peers: []*metapb.Peer{
{StoreId: storeID, Id: peerID},
},
}
}
func openDB(t *testing.T, dir string) *badger.DB {
opts := badger.DefaultOptions
opts.Dir = dir
opts.ValueDir = dir
db, err := badger.Open(opts)
require.Nil(t, err)
return db
}
func fillDBData(t *testing.T, db *badger.DB) {
// write some data for multiple cfs.
wb := new(engine_util.WriteBatch)
value := make([]byte, 32)
wb.SetCF(engine_util.CfDefault, []byte("key"), value)
wb.SetCF(engine_util.CfWrite, []byte("key"), value)
wb.SetCF(engine_util.CfLock, []byte("key"), value)
err := wb.WriteToDB(db)
require.Nil(t, err)
}
func TestGcRaftLog(t *testing.T) {
engines := util.NewTestEngines()
defer engines.Destroy()
raftDb := engines.Raft
taskResCh := make(chan raftLogGcTaskRes, 1)
runner := raftLogGCTaskHandler{taskResCh: taskResCh}
// generate raft logs
regionId := uint64(1)
raftWb := new(engine_util.WriteBatch)
for i := uint64(0); i < 100; i++ {
raftWb.SetMeta(meta.RaftLogKey(regionId, i), &eraftpb.Entry{Data: []byte("entry")})
}
raftWb.WriteToDB(raftDb)
type tempHolder struct {
raftLogGcTask worker.Task
expectedCollected uint64
nonExistRange [2]uint64
existRange [2]uint64
}
tbls := []tempHolder{
{
raftLogGcTask: &RaftLogGCTask{
RaftEngine: raftDb,
RegionID: regionId,
StartIdx: uint64(0),
EndIdx: uint64(10),
},
expectedCollected: uint64(10),
nonExistRange: [...]uint64{0, 10},
existRange: [...]uint64{10, 100},
},
{
raftLogGcTask: &RaftLogGCTask{
RaftEngine: raftDb,
RegionID: regionId,
StartIdx: uint64(0),
EndIdx: uint64(50),
},
expectedCollected: uint64(40),
nonExistRange: [...]uint64{0, 50},
existRange: [...]uint64{50, 100},
},
{
raftLogGcTask: &RaftLogGCTask{
RaftEngine: raftDb,
RegionID: regionId,
StartIdx: uint64(50),
EndIdx: uint64(50),
},
expectedCollected: uint64(0),
nonExistRange: [...]uint64{0, 50},
existRange: [...]uint64{50, 100},
},
{
raftLogGcTask: &RaftLogGCTask{
RaftEngine: raftDb,
RegionID: regionId,
StartIdx: uint64(50),
EndIdx: uint64(60),
},
expectedCollected: uint64(10),
nonExistRange: [...]uint64{0, 60},
existRange: [...]uint64{60, 100},
},
}
for _, h := range tbls {
runner.Handle(h.raftLogGcTask)
res := <-taskResCh
assert.Equal(t, h.expectedCollected, uint64(res))
raftLogMustNotExist(t, raftDb, 1, h.nonExistRange[0], h.nonExistRange[1])
raftLogMustExist(t, raftDb, 1, h.existRange[0], h.existRange[1])
}
}
func raftLogMustNotExist(t *testing.T, db *badger.DB, regionId, startIdx, endIdx uint64) {
for i := startIdx; i < endIdx; i++ {
k := meta.RaftLogKey(regionId, i)
db.View(func(txn *badger.Txn) error {
_, err := txn.Get(k)
assert.Equal(t, err, badger.ErrKeyNotFound)
return nil
})
}
}
func raftLogMustExist(t *testing.T, db *badger.DB, regionId, startIdx, endIdx uint64) {
for i := startIdx; i < endIdx; i++ {
k := meta.RaftLogKey(regionId, i)
db.View(func(txn *badger.Txn) error {
item, err := txn.Get(k)
assert.Nil(t, err)
assert.NotNil(t, item)
return nil
})
}
}
func cleanUpTestEngineData(engines *engine_util.Engines) {
if err := engines.Destroy(); err != nil {
panic(err)
}
}
type TaskResRouter struct {
ch chan<- message.Msg
}
func (r *TaskResRouter) Send(regionID uint64, msg message.Msg) error {
r.ch <- msg
return nil
}
func (r *TaskResRouter) SendRaftMessage(msg *rspb.RaftMessage) error {
return nil
}
func (r *TaskResRouter) SendRaftCommand(req *raft_cmdpb.RaftCmdRequest, cb *message.Callback) error {
return nil
}
func encodeKey(key []byte, ts uint64) []byte {
encodedKey := codec.EncodeBytes(key)
newKey := append(encodedKey, make([]byte, 8)...)
binary.BigEndian.PutUint64(newKey[len(newKey)-8:], ^ts)
return newKey
}
func TestSplitCheck(t *testing.T) {
engines := util.NewTestEngines()
defer cleanUpTestEngineData(engines)
db := engines.Kv
taskResCh := make(chan message.Msg, 1)
runner := &splitCheckHandler{
engine: db,
router: &TaskResRouter{ch: taskResCh},
checker: newSizeSplitChecker(100, 50),
}
kvWb := new(engine_util.WriteBatch)
// the length of each kv pair is 21
kvWb.SetCF(engine_util.CfDefault, encodeKey([]byte("k1"), 1), []byte("entry"))
kvWb.SetCF(engine_util.CfDefault, encodeKey([]byte("k1"), 2), []byte("entry"))
kvWb.SetCF(engine_util.CfDefault, encodeKey([]byte("k2"), 1), []byte("entry"))
kvWb.SetCF(engine_util.CfDefault, encodeKey([]byte("k2"), 2), []byte("entry"))
kvWb.SetCF(engine_util.CfDefault, encodeKey([]byte("k3"), 3), []byte("entry"))
kvWb.MustWriteToDB(db)
task := &SplitCheckTask{
Region: &metapb.Region{
StartKey: []byte(""),
EndKey: []byte(""),
},
}
runner.Handle(task)
msg := <-taskResCh
split, ok := msg.Data.(*message.MsgSplitRegion)
assert.True(t, ok)
assert.Equal(t, split.SplitKey, codec.EncodeBytes([]byte("k2")))
}

View File

@ -0,0 +1,152 @@
package runner
import (
"context"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/scheduler_client"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/schedulerpb"
"github.com/shirou/gopsutil/disk"
)
type SchedulerAskSplitTask struct {
Region *metapb.Region
SplitKey []byte
Peer *metapb.Peer
Callback *message.Callback
}
type SchedulerRegionHeartbeatTask struct {
Region *metapb.Region
Peer *metapb.Peer
PendingPeers []*metapb.Peer
ApproximateSize *uint64
}
type SchedulerStoreHeartbeatTask struct {
Stats *schedulerpb.StoreStats
Engine *badger.DB
Path string
}
type SchedulerTaskHandler struct {
storeID uint64
SchedulerClient scheduler_client.Client
router message.RaftRouter
}
func NewSchedulerTaskHandler(storeID uint64, SchedulerClient scheduler_client.Client, router message.RaftRouter) *SchedulerTaskHandler {
return &SchedulerTaskHandler{
storeID: storeID,
SchedulerClient: SchedulerClient,
router: router,
}
}
func (r *SchedulerTaskHandler) Handle(t worker.Task) {
switch t.(type) {
case *SchedulerAskSplitTask:
r.onAskSplit(t.(*SchedulerAskSplitTask))
case *SchedulerRegionHeartbeatTask:
r.onHeartbeat(t.(*SchedulerRegionHeartbeatTask))
case *SchedulerStoreHeartbeatTask:
r.onStoreHeartbeat(t.(*SchedulerStoreHeartbeatTask))
default:
log.Error("unsupported worker.Task: %+v", t)
}
}
func (r *SchedulerTaskHandler) Start() {
r.SchedulerClient.SetRegionHeartbeatResponseHandler(r.storeID, r.onRegionHeartbeatResponse)
}
func (r *SchedulerTaskHandler) onRegionHeartbeatResponse(resp *schedulerpb.RegionHeartbeatResponse) {
if changePeer := resp.GetChangePeer(); changePeer != nil {
r.sendAdminRequest(resp.RegionId, resp.RegionEpoch, resp.TargetPeer, &raft_cmdpb.AdminRequest{
CmdType: raft_cmdpb.AdminCmdType_ChangePeer,
ChangePeer: &raft_cmdpb.ChangePeerRequest{
ChangeType: changePeer.ChangeType,
Peer: changePeer.Peer,
},
}, message.NewCallback())
} else if transferLeader := resp.GetTransferLeader(); transferLeader != nil {
r.sendAdminRequest(resp.RegionId, resp.RegionEpoch, resp.TargetPeer, &raft_cmdpb.AdminRequest{
CmdType: raft_cmdpb.AdminCmdType_TransferLeader,
TransferLeader: &raft_cmdpb.TransferLeaderRequest{
Peer: transferLeader.Peer,
},
}, message.NewCallback())
}
}
func (r *SchedulerTaskHandler) onAskSplit(t *SchedulerAskSplitTask) {
resp, err := r.SchedulerClient.AskSplit(context.TODO(), t.Region)
if err != nil {
log.Error(err)
return
}
aq := &raft_cmdpb.AdminRequest{
CmdType: raft_cmdpb.AdminCmdType_Split,
Split: &raft_cmdpb.SplitRequest{
SplitKey: t.SplitKey,
NewRegionId: resp.NewRegionId,
NewPeerIds: resp.NewPeerIds,
},
}
r.sendAdminRequest(t.Region.GetId(), t.Region.GetRegionEpoch(), t.Peer, aq, t.Callback)
}
func (r *SchedulerTaskHandler) onHeartbeat(t *SchedulerRegionHeartbeatTask) {
var size int64
if t.ApproximateSize != nil {
size = int64(*t.ApproximateSize)
}
req := &schedulerpb.RegionHeartbeatRequest{
Region: t.Region,
Leader: t.Peer,
PendingPeers: t.PendingPeers,
ApproximateSize: uint64(size),
}
r.SchedulerClient.RegionHeartbeat(req)
}
func (r *SchedulerTaskHandler) onStoreHeartbeat(t *SchedulerStoreHeartbeatTask) {
diskStat, err := disk.Usage(t.Path)
if err != nil {
log.Error(err)
return
}
capacity := diskStat.Total
lsmSize, vlogSize := t.Engine.Size()
usedSize := t.Stats.UsedSize + uint64(lsmSize) + uint64(vlogSize) // t.Stats.UsedSize contains size of snapshot files.
available := uint64(0)
if capacity > usedSize {
available = capacity - usedSize
}
t.Stats.Capacity = capacity
t.Stats.UsedSize = usedSize
t.Stats.Available = available
r.SchedulerClient.StoreHeartbeat(context.TODO(), t.Stats)
}
func (r *SchedulerTaskHandler) sendAdminRequest(regionID uint64, epoch *metapb.RegionEpoch, peer *metapb.Peer, req *raft_cmdpb.AdminRequest, callback *message.Callback) {
cmd := &raft_cmdpb.RaftCmdRequest{
Header: &raft_cmdpb.RaftRequestHeader{
RegionId: regionID,
Peer: peer,
RegionEpoch: epoch,
},
AdminRequest: req,
}
r.router.SendRaftCommand(cmd, callback)
}

View File

@ -0,0 +1,134 @@
package runner
import (
"encoding/hex"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/codec"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
)
type SplitCheckTask struct {
Region *metapb.Region
}
type splitCheckHandler struct {
engine *badger.DB
router message.RaftRouter
checker *sizeSplitChecker
}
func NewSplitCheckHandler(engine *badger.DB, router message.RaftRouter, conf *config.Config) *splitCheckHandler {
runner := &splitCheckHandler{
engine: engine,
router: router,
checker: newSizeSplitChecker(conf.RegionMaxSize, conf.RegionSplitSize),
}
return runner
}
/// run checks a region with split checkers to produce split keys and generates split admin command.
func (r *splitCheckHandler) Handle(t worker.Task) {
spCheckTask, ok := t.(*SplitCheckTask)
if !ok {
log.Error("unsupported worker.Task: %+v", t)
return
}
region := spCheckTask.Region
regionId := region.Id
log.Debugf("executing split check worker.Task: [regionId: %d, startKey: %s, endKey: %s]", regionId,
hex.EncodeToString(region.StartKey), hex.EncodeToString(region.EndKey))
key := r.splitCheck(regionId, region.StartKey, region.EndKey)
if key != nil {
_, userKey, err := codec.DecodeBytes(key)
if err == nil {
// It's not a raw key.
// To make sure the keys of same user key locate in one Region, decode and then encode to truncate the timestamp
key = codec.EncodeBytes(userKey)
}
msg := message.Msg{
Type: message.MsgTypeSplitRegion,
RegionID: regionId,
Data: &message.MsgSplitRegion{
RegionEpoch: region.GetRegionEpoch(),
SplitKey: key,
},
}
err = r.router.Send(regionId, msg)
if err != nil {
log.Warnf("failed to send check result: [regionId: %d, err: %v]", regionId, err)
}
} else {
log.Debugf("no need to send, split key not found: [regionId: %v]", regionId)
}
}
/// SplitCheck gets the split keys by scanning the range.
func (r *splitCheckHandler) splitCheck(regionID uint64, startKey, endKey []byte) []byte {
txn := r.engine.NewTransaction(false)
defer txn.Discard()
r.checker.reset()
it := engine_util.NewCFIterator(engine_util.CfDefault, txn)
defer it.Close()
for it.Seek(startKey); it.Valid(); it.Next() {
item := it.Item()
key := item.Key()
if engine_util.ExceedEndKey(key, endKey) {
// update region size
r.router.Send(regionID, message.Msg{
Type: message.MsgTypeRegionApproximateSize,
Data: r.checker.currentSize,
})
break
}
if r.checker.onKv(key, item) {
break
}
}
return r.checker.getSplitKey()
}
type sizeSplitChecker struct {
maxSize uint64
splitSize uint64
currentSize uint64
splitKey []byte
}
func newSizeSplitChecker(maxSize, splitSize uint64) *sizeSplitChecker {
return &sizeSplitChecker{
maxSize: maxSize,
splitSize: splitSize,
}
}
func (checker *sizeSplitChecker) reset() {
checker.currentSize = 0
checker.splitKey = nil
}
func (checker *sizeSplitChecker) onKv(key []byte, item engine_util.DBItem) bool {
valueSize := uint64(item.ValueSize())
size := uint64(len(key)) + valueSize
checker.currentSize += size
if checker.currentSize > checker.splitSize && checker.splitKey == nil {
checker.splitKey = util.SafeCopy(key)
}
return checker.currentSize > checker.maxSize
}
func (checker *sizeSplitChecker) getSplitKey() []byte {
// Make sure not to split when less than maxSize for last part
if checker.currentSize < checker.maxSize {
checker.splitKey = nil
}
return checker.splitKey
}

View File

@ -0,0 +1,554 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package scheduler_client
import (
"context"
"net/url"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/juju/errors"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/schedulerpb"
"google.golang.org/grpc"
)
// Client is a Scheduler client.
// It should not be used after calling Close().
type Client interface {
GetClusterID(ctx context.Context) uint64
AllocID(ctx context.Context) (uint64, error)
Bootstrap(ctx context.Context, store *metapb.Store) (*schedulerpb.BootstrapResponse, error)
IsBootstrapped(ctx context.Context) (bool, error)
PutStore(ctx context.Context, store *metapb.Store) error
GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error)
GetRegion(ctx context.Context, key []byte) (*metapb.Region, *metapb.Peer, error)
GetRegionByID(ctx context.Context, regionID uint64) (*metapb.Region, *metapb.Peer, error)
AskSplit(ctx context.Context, region *metapb.Region) (*schedulerpb.AskSplitResponse, error)
StoreHeartbeat(ctx context.Context, stats *schedulerpb.StoreStats) error
RegionHeartbeat(*schedulerpb.RegionHeartbeatRequest) error
SetRegionHeartbeatResponseHandler(storeID uint64, h func(*schedulerpb.RegionHeartbeatResponse))
Close()
}
const (
schedulerTimeout = time.Second
retryInterval = time.Second
maxInitClusterRetries = 100
maxRetryCount = 10
)
var (
// errFailInitClusterID is returned when failed to load clusterID from all supplied Scheduler addresses.
errFailInitClusterID = errors.New("[scheduler] failed to get cluster id")
)
type client struct {
urls []string
clusterID uint64
tag string
connMu struct {
sync.RWMutex
clientConns map[string]*grpc.ClientConn
leader string
}
checkLeaderCh chan struct{}
receiveRegionHeartbeatCh chan *schedulerpb.RegionHeartbeatResponse
regionCh chan *schedulerpb.RegionHeartbeatRequest
pendingRequest *schedulerpb.RegionHeartbeatRequest
wg sync.WaitGroup
ctx context.Context
cancel context.CancelFunc
heartbeatHandler atomic.Value
}
// NewClient creates a Scheduler client.
func NewClient(pdAddrs []string, tag string) (Client, error) {
ctx, cancel := context.WithCancel(context.Background())
urls := make([]string, 0, len(pdAddrs))
for _, addr := range pdAddrs {
if strings.Contains(addr, "://") {
urls = append(urls, addr)
} else {
urls = append(urls, "http://"+addr)
}
}
log.Infof("[%s][scheduler] create scheduler client with endpoints %v", tag, urls)
c := &client{
urls: urls,
receiveRegionHeartbeatCh: make(chan *schedulerpb.RegionHeartbeatResponse, 1),
checkLeaderCh: make(chan struct{}, 1),
ctx: ctx,
cancel: cancel,
tag: tag,
regionCh: make(chan *schedulerpb.RegionHeartbeatRequest, 64),
}
c.connMu.clientConns = make(map[string]*grpc.ClientConn)
var (
err error
members *schedulerpb.GetMembersResponse
)
for i := 0; i < maxRetryCount; i++ {
if members, err = c.updateLeader(); err == nil {
break
}
time.Sleep(retryInterval)
}
if err != nil {
return nil, err
}
c.clusterID = members.GetHeader().GetClusterId()
log.Infof("[%s][scheduler] init cluster id %v", tag, c.clusterID)
c.wg.Add(2)
go c.checkLeaderLoop()
go c.heartbeatStreamLoop()
return c, nil
}
func (c *client) schedulerUpdateLeader() {
select {
case c.checkLeaderCh <- struct{}{}:
default:
}
}
func (c *client) checkLeaderLoop() {
defer c.wg.Done()
ctx, cancel := context.WithCancel(c.ctx)
defer cancel()
ticker := time.NewTicker(time.Minute)
defer ticker.Stop()
for {
select {
case <-c.checkLeaderCh:
case <-ticker.C:
case <-ctx.Done():
return
}
if _, err := c.updateLeader(); err != nil {
log.Errorf("[scheduler] failed updateLeader, err: %s", err)
}
}
}
func (c *client) updateLeader() (*schedulerpb.GetMembersResponse, error) {
for _, u := range c.urls {
ctx, cancel := context.WithTimeout(c.ctx, schedulerTimeout)
members, err := c.getMembers(ctx, u)
cancel()
if err != nil || members.GetLeader() == nil || len(members.GetLeader().GetClientUrls()) == 0 {
select {
case <-c.ctx.Done():
return nil, err
default:
continue
}
}
c.updateURLs(members.GetMembers(), members.GetLeader())
return members, c.switchLeader(members.GetLeader().GetClientUrls())
}
return nil, errors.Errorf("failed to get leader from %v", c.urls)
}
func (c *client) updateURLs(members []*schedulerpb.Member, leader *schedulerpb.Member) {
urls := make([]string, 0, len(members))
for _, m := range members {
if m.GetMemberId() == leader.GetMemberId() {
continue
}
urls = append(urls, m.GetClientUrls()...)
}
c.urls = append(urls, leader.GetClientUrls()...)
}
func (c *client) switchLeader(addrs []string) error {
addr := addrs[0]
c.connMu.RLock()
oldLeader := c.connMu.leader
c.connMu.RUnlock()
if addr == oldLeader {
return nil
}
log.Infof("[scheduler] switch leader, new-leader: %s, old-leader: %s", addr, oldLeader)
if _, err := c.getOrCreateConn(addr); err != nil {
return err
}
c.connMu.Lock()
c.connMu.leader = addr
c.connMu.Unlock()
return nil
}
func (c *client) getMembers(ctx context.Context, url string) (*schedulerpb.GetMembersResponse, error) {
cc, err := c.getOrCreateConn(url)
if err != nil {
return nil, err
}
return schedulerpb.NewSchedulerClient(cc).GetMembers(ctx, new(schedulerpb.GetMembersRequest))
}
func (c *client) getOrCreateConn(addr string) (*grpc.ClientConn, error) {
c.connMu.RLock()
conn, ok := c.connMu.clientConns[addr]
c.connMu.RUnlock()
if ok {
return conn, nil
}
u, err := url.Parse(addr)
if err != nil {
return nil, err
}
cc, err := grpc.Dial(u.Host, grpc.WithInsecure())
if err != nil {
return nil, err
}
c.connMu.Lock()
defer c.connMu.Unlock()
if old, ok := c.connMu.clientConns[addr]; ok {
cc.Close()
return old, nil
}
c.connMu.clientConns[addr] = cc
return cc, nil
}
func (c *client) leaderClient() schedulerpb.SchedulerClient {
c.connMu.RLock()
defer c.connMu.RUnlock()
return schedulerpb.NewSchedulerClient(c.connMu.clientConns[c.connMu.leader])
}
func (c *client) doRequest(ctx context.Context, f func(context.Context, schedulerpb.SchedulerClient) error) error {
var err error
for i := 0; i < maxRetryCount; i++ {
ctx1, cancel := context.WithTimeout(ctx, schedulerTimeout)
err = f(ctx1, c.leaderClient())
cancel()
if err == nil {
return nil
}
c.schedulerUpdateLeader()
select {
case <-time.After(retryInterval):
continue
case <-ctx.Done():
return ctx.Err()
}
}
return errors.New("failed too many times")
}
func (c *client) heartbeatStreamLoop() {
defer c.wg.Done()
for {
select {
case <-c.ctx.Done():
return
default:
}
ctx, cancel := context.WithCancel(c.ctx)
c.connMu.RLock()
stream, err := c.leaderClient().RegionHeartbeat(ctx)
c.connMu.RUnlock()
if err != nil {
cancel()
c.schedulerUpdateLeader()
time.Sleep(retryInterval)
continue
}
errCh := make(chan error, 1)
wg := &sync.WaitGroup{}
wg.Add(2)
go c.reportRegionHeartbeat(ctx, stream, errCh, wg)
go c.receiveRegionHeartbeat(stream, errCh, wg)
select {
case err := <-errCh:
log.Warnf("[%s][scheduler] heartbeat stream get error: %s ", c.tag, err)
cancel()
c.schedulerUpdateLeader()
time.Sleep(retryInterval)
wg.Wait()
case <-c.ctx.Done():
log.Info("cancel heartbeat stream loop")
cancel()
return
}
}
}
func (c *client) receiveRegionHeartbeat(stream schedulerpb.Scheduler_RegionHeartbeatClient, errCh chan error, wg *sync.WaitGroup) {
defer wg.Done()
for {
resp, err := stream.Recv()
if err != nil {
errCh <- err
return
}
if h := c.heartbeatHandler.Load(); h != nil {
h.(func(*schedulerpb.RegionHeartbeatResponse))(resp)
}
}
}
func (c *client) reportRegionHeartbeat(ctx context.Context, stream schedulerpb.Scheduler_RegionHeartbeatClient, errCh chan error, wg *sync.WaitGroup) {
defer wg.Done()
for {
request, ok := c.getNextHeartbeatRequest(ctx)
if !ok {
return
}
request.Header = c.requestHeader()
err := stream.Send(request)
if err != nil {
c.pendingRequest = request
errCh <- err
return
}
}
}
func (c *client) getNextHeartbeatRequest(ctx context.Context) (*schedulerpb.RegionHeartbeatRequest, bool) {
if c.pendingRequest != nil {
req := c.pendingRequest
c.pendingRequest = nil
return req, true
}
select {
case <-ctx.Done():
return nil, false
case request, ok := <-c.regionCh:
if !ok {
return nil, false
}
return request, true
}
}
func (c *client) Close() {
c.cancel()
c.wg.Wait()
c.connMu.Lock()
defer c.connMu.Unlock()
for _, cc := range c.connMu.clientConns {
cc.Close()
}
}
func (c *client) GetClusterID(context.Context) uint64 {
return c.clusterID
}
func (c *client) AllocID(ctx context.Context) (uint64, error) {
var resp *schedulerpb.AllocIDResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.AllocID(ctx, &schedulerpb.AllocIDRequest{
Header: c.requestHeader(),
})
return err1
})
if err != nil {
return 0, err
}
return resp.GetId(), nil
}
func (c *client) Bootstrap(ctx context.Context, store *metapb.Store) (resp *schedulerpb.BootstrapResponse, err error) {
err = c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.Bootstrap(ctx, &schedulerpb.BootstrapRequest{
Header: c.requestHeader(),
Store: store,
})
return err1
})
return resp, err
}
func (c *client) IsBootstrapped(ctx context.Context) (bool, error) {
var resp *schedulerpb.IsBootstrappedResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.IsBootstrapped(ctx, &schedulerpb.IsBootstrappedRequest{Header: c.requestHeader()})
return err1
})
if err != nil {
return false, err
}
if herr := resp.Header.GetError(); herr != nil {
return false, errors.New(herr.String())
}
return resp.Bootstrapped, nil
}
func (c *client) PutStore(ctx context.Context, store *metapb.Store) error {
var resp *schedulerpb.PutStoreResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.PutStore(ctx, &schedulerpb.PutStoreRequest{
Header: c.requestHeader(),
Store: store,
})
return err1
})
if err != nil {
return err
}
if herr := resp.Header.GetError(); herr != nil {
return errors.New(herr.String())
}
return nil
}
func (c *client) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) {
var resp *schedulerpb.GetStoreResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.GetStore(ctx, &schedulerpb.GetStoreRequest{
Header: c.requestHeader(),
StoreId: storeID,
})
return err1
})
if err != nil {
return nil, err
}
if herr := resp.Header.GetError(); herr != nil {
return nil, errors.New(herr.String())
}
return resp.Store, nil
}
func (c *client) GetRegion(ctx context.Context, key []byte) (*metapb.Region, *metapb.Peer, error) {
var resp *schedulerpb.GetRegionResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.GetRegion(ctx, &schedulerpb.GetRegionRequest{
Header: c.requestHeader(),
RegionKey: key,
})
return err1
})
if err != nil {
return nil, nil, err
}
if herr := resp.Header.GetError(); herr != nil {
return nil, nil, errors.New(herr.String())
}
return resp.Region, resp.Leader, nil
}
func (c *client) GetRegionByID(ctx context.Context, regionID uint64) (*metapb.Region, *metapb.Peer, error) {
var resp *schedulerpb.GetRegionResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.GetRegionByID(ctx, &schedulerpb.GetRegionByIDRequest{
Header: c.requestHeader(),
RegionId: regionID,
})
return err1
})
if err != nil {
return nil, nil, err
}
if herr := resp.Header.GetError(); herr != nil {
return nil, nil, errors.New(herr.String())
}
return resp.Region, resp.Leader, nil
}
func (c *client) AskSplit(ctx context.Context, region *metapb.Region) (resp *schedulerpb.AskSplitResponse, err error) {
err = c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.AskSplit(ctx, &schedulerpb.AskSplitRequest{
Header: c.requestHeader(),
Region: region,
})
return err1
})
if err != nil {
return nil, err
}
if herr := resp.Header.GetError(); herr != nil {
return nil, errors.New(herr.String())
}
return resp, nil
}
func (c *client) StoreHeartbeat(ctx context.Context, stats *schedulerpb.StoreStats) error {
var resp *schedulerpb.StoreHeartbeatResponse
err := c.doRequest(ctx, func(ctx context.Context, client schedulerpb.SchedulerClient) error {
var err1 error
resp, err1 = client.StoreHeartbeat(ctx, &schedulerpb.StoreHeartbeatRequest{
Header: c.requestHeader(),
Stats: stats,
})
return err1
})
if err != nil {
return err
}
if herr := resp.Header.GetError(); herr != nil {
return errors.New(herr.String())
}
return nil
}
func (c *client) RegionHeartbeat(request *schedulerpb.RegionHeartbeatRequest) error {
c.regionCh <- request
return nil
}
func (c *client) SetRegionHeartbeatResponseHandler(_ uint64, h func(*schedulerpb.RegionHeartbeatResponse)) {
if h == nil {
h = func(*schedulerpb.RegionHeartbeatResponse) {}
}
c.heartbeatHandler.Store(h)
}
func (c *client) requestHeader() *schedulerpb.RequestHeader {
return &schedulerpb.RequestHeader{
ClusterId: c.clusterID,
}
}

783
kv/raftstore/snap/snap.go Normal file
View File

@ -0,0 +1,783 @@
package snap
import (
"fmt"
"hash"
"hash/crc32"
"io"
"os"
"path/filepath"
"strings"
"sync/atomic"
"time"
"github.com/Connor1996/badger"
"github.com/Connor1996/badger/table"
"github.com/pingcap-incubator/tinykv/kv/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
)
type SnapStateType int
const (
SnapState_Relax SnapStateType = 0 + iota
SnapState_Generating
SnapState_Applying
)
type SnapState struct {
StateType SnapStateType
Receiver chan *eraftpb.Snapshot
}
const (
snapGenPrefix = "gen" // Name prefix for the self-generated snapshot file.
snapRevPrefix = "rev" // Name prefix for the received snapshot file.
sstFileSuffix = ".sst"
tmpFileSuffix = ".tmp"
cloneFileSuffix = ".clone"
metaFileSuffix = ".meta"
deleteRetryMaxTime = 6
deleteRetryDuration = 500 * time.Millisecond
)
type ApplySnapAbortError string
func (e ApplySnapAbortError) Error() string {
return string(e)
}
var (
errAbort = ApplySnapAbortError("abort")
)
type SnapKeyWithSending struct {
SnapKey SnapKey
IsSending bool
}
type SnapKey struct {
RegionID uint64
Term uint64
Index uint64
}
func (k SnapKey) String() string {
return fmt.Sprintf("%d_%d_%d", k.RegionID, k.Term, k.Index)
}
func SnapKeyFromRegionSnap(regionID uint64, snap *eraftpb.Snapshot) SnapKey {
return SnapKey{
RegionID: regionID,
Term: snap.Metadata.Term,
Index: snap.Metadata.Index,
}
}
func SnapKeyFromSnap(snap *eraftpb.Snapshot) (SnapKey, error) {
data := new(rspb.RaftSnapshotData)
err := data.Unmarshal(snap.Data)
if err != nil {
return SnapKey{}, err
}
return SnapKeyFromRegionSnap(data.Region.Id, snap), nil
}
type SnapStatistics struct {
Size uint64
KVCount int
}
type ApplyOptions struct {
DB *badger.DB
Region *metapb.Region
}
func NewApplyOptions(db *badger.DB, region *metapb.Region) *ApplyOptions {
return &ApplyOptions{
DB: db,
Region: region,
}
}
// `Snapshot` is an interface for snapshot.
// It's used in these scenarios:
// 1. build local snapshot
// 2. read local snapshot and then replicate it to remote raftstores
// 3. receive snapshot from remote raftstore and write it to local storage
// 4. apply snapshot
// 5. snapshot gc
type Snapshot interface {
io.Reader
io.Writer
Build(dbSnap *badger.Txn, region *metapb.Region, snapData *rspb.RaftSnapshotData, stat *SnapStatistics, deleter SnapshotDeleter) error
Path() string
Exists() bool
Delete()
Meta() (os.FileInfo, error)
TotalSize() uint64
Save() error
Apply(option ApplyOptions) error
}
// `SnapshotDeleter` is a trait for deleting snapshot.
// It's used to ensure that the snapshot deletion happens under the protection of locking
// to avoid race case for concurrent read/write.
type SnapshotDeleter interface {
// DeleteSnapshot returns true if it successfully delete the specified snapshot.
DeleteSnapshot(key SnapKey, snapshot Snapshot, checkEntry bool) bool
}
func retryDeleteSnapshot(deleter SnapshotDeleter, key SnapKey, snap Snapshot) bool {
for i := 0; i < deleteRetryMaxTime; i++ {
if deleter.DeleteSnapshot(key, snap, true) {
return true
}
time.Sleep(deleteRetryDuration)
}
return false
}
func genSnapshotMeta(cfFiles []*CFFile) (*rspb.SnapshotMeta, error) {
cfMetas := make([]*rspb.SnapshotCFFile, 0, len(engine_util.CFs))
for _, cfFile := range cfFiles {
var found bool
for _, snapCF := range engine_util.CFs {
if snapCF == cfFile.CF {
found = true
break
}
}
if !found {
return nil, errors.Errorf("failed to encode invalid snapshot CF %s", cfFile.CF)
}
cfMeta := &rspb.SnapshotCFFile{
Cf: cfFile.CF,
Size_: cfFile.Size,
Checksum: cfFile.Checksum,
}
cfMetas = append(cfMetas, cfMeta)
}
return &rspb.SnapshotMeta{
CfFiles: cfMetas,
}, nil
}
func checkFileSize(path string, expectedSize uint64) error {
size, err := util.GetFileSize(path)
if err != nil {
return err
}
if size != expectedSize {
return errors.Errorf("invalid size %d for snapshot cf file %s, expected %d", size, path, expectedSize)
}
return nil
}
func checkFileChecksum(path string, expectedChecksum uint32) error {
checksum, err := util.CalcCRC32(path)
if err != nil {
return err
}
if checksum != expectedChecksum {
return errors.Errorf("invalid checksum %d for snapshot cf file %s, expected %d",
checksum, path, expectedChecksum)
}
return nil
}
func checkFileSizeAndChecksum(path string, expectedSize uint64, expectedChecksum uint32) error {
err := checkFileSize(path, expectedSize)
if err == nil {
err = checkFileChecksum(path, expectedChecksum)
}
return err
}
type CFFile struct {
CF string
Path string
TmpPath string
SstWriter *table.Builder
File *os.File
KVCount int
Size uint64
WrittenSize uint64
Checksum uint32
WriteDigest hash.Hash32
}
type MetaFile struct {
Meta *rspb.SnapshotMeta
Path string
File *os.File
// for writing snapshot
TmpPath string
}
var _ Snapshot = new(Snap)
type Snap struct {
key SnapKey
displayPath string
CFFiles []*CFFile
cfIndex int
MetaFile *MetaFile
SizeTrack *int64
holdTmpFiles bool
}
func NewSnap(dir string, key SnapKey, sizeTrack *int64, isSending, toBuild bool,
deleter SnapshotDeleter) (*Snap, error) {
if !util.DirExists(dir) {
err := os.MkdirAll(dir, 0700)
if err != nil {
return nil, errors.WithStack(err)
}
}
var snapPrefix string
if isSending {
snapPrefix = snapGenPrefix
} else {
snapPrefix = snapRevPrefix
}
prefix := fmt.Sprintf("%s_%s", snapPrefix, key)
displayPath := getDisplayPath(dir, prefix)
cfFiles := make([]*CFFile, 0, len(engine_util.CFs))
for _, cf := range engine_util.CFs {
fileName := fmt.Sprintf("%s_%s%s", prefix, cf, sstFileSuffix)
path := filepath.Join(dir, fileName)
tmpPath := path + tmpFileSuffix
cfFile := &CFFile{
CF: cf,
Path: path,
TmpPath: tmpPath,
}
cfFiles = append(cfFiles, cfFile)
}
metaFileName := fmt.Sprintf("%s%s", prefix, metaFileSuffix)
metaFilePath := filepath.Join(dir, metaFileName)
metaTmpPath := metaFilePath + tmpFileSuffix
metaFile := &MetaFile{
Path: metaFilePath,
TmpPath: metaTmpPath,
}
s := &Snap{
key: key,
displayPath: displayPath,
CFFiles: cfFiles,
MetaFile: metaFile,
SizeTrack: sizeTrack,
}
// load snapshot meta if meta file exists.
if util.FileExists(metaFile.Path) {
err := s.loadSnapMeta()
if err != nil {
if !toBuild {
return nil, err
}
log.Warnf("failed to load existent snapshot meta when try to build %s: %v", s.Path(), err)
if !retryDeleteSnapshot(deleter, key, s) {
log.Warnf("failed to delete snapshot %s because it's already registered elsewhere", s.Path())
return nil, err
}
}
}
return s, nil
}
func NewSnapForBuilding(dir string, key SnapKey, sizeTrack *int64, deleter SnapshotDeleter) (*Snap, error) {
s, err := NewSnap(dir, key, sizeTrack, true, true, deleter)
if err != nil {
return nil, err
}
err = s.initForBuilding()
if err != nil {
return nil, err
}
return s, nil
}
func NewSnapForSending(dir string, key SnapKey, sizeTrack *int64, deleter SnapshotDeleter) (*Snap, error) {
s, err := NewSnap(dir, key, sizeTrack, true, false, deleter)
if err != nil {
return nil, err
}
if !s.Exists() {
// Skip the initialization below if it doesn't exists.
return s, nil
}
for _, cfFile := range s.CFFiles {
// initialize cf file size and reader
if cfFile.Size > 0 {
cfFile.File, err = os.Open(cfFile.Path)
if err != nil {
return nil, errors.WithStack(err)
}
}
}
return s, nil
}
func NewSnapForReceiving(dir string, key SnapKey, snapshotMeta *rspb.SnapshotMeta,
sizeTrack *int64, deleter SnapshotDeleter) (*Snap, error) {
s, err := NewSnap(dir, key, sizeTrack, false, false, deleter)
if err != nil {
return nil, err
}
err = s.setSnapshotMeta(snapshotMeta)
if err != nil {
return nil, err
}
if s.Exists() {
return s, nil
}
f, err := os.OpenFile(s.MetaFile.TmpPath, os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return nil, err
}
s.MetaFile.File = f
s.holdTmpFiles = true
for _, cfFile := range s.CFFiles {
if cfFile.Size == 0 {
continue
}
f, err = os.OpenFile(cfFile.TmpPath, os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return nil, err
}
cfFile.File = f
cfFile.WriteDigest = crc32.NewIEEE()
}
return s, nil
}
func NewSnapForApplying(dir string, key SnapKey, sizeTrack *int64, deleter SnapshotDeleter) (*Snap, error) {
return NewSnap(dir, key, sizeTrack, false, false, deleter)
}
func (s *Snap) initForBuilding() error {
if s.Exists() {
return nil
}
file, err := os.OpenFile(s.MetaFile.TmpPath, os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return err
}
s.MetaFile.File = file
s.holdTmpFiles = true
for _, cfFile := range s.CFFiles {
file, err = os.OpenFile(cfFile.TmpPath, os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return err
}
cfFile.SstWriter = table.NewExternalTableBuilder(file, nil, badger.DefaultOptions.TableBuilderOptions)
}
return nil
}
func (s *Snap) readSnapshotMeta() (*rspb.SnapshotMeta, error) {
fi, err := os.Stat(s.MetaFile.Path)
if err != nil {
return nil, errors.WithStack(err)
}
file, err := os.Open(s.MetaFile.Path)
if err != nil {
return nil, errors.WithStack(err)
}
size := fi.Size()
buf := make([]byte, size)
_, err = io.ReadFull(file, buf)
if err != nil {
return nil, errors.WithStack(err)
}
snapshotMeta := new(rspb.SnapshotMeta)
err = snapshotMeta.Unmarshal(buf)
if err != nil {
return nil, errors.WithStack(err)
}
return snapshotMeta, nil
}
func (s *Snap) setSnapshotMeta(snapshotMeta *rspb.SnapshotMeta) error {
if len(snapshotMeta.CfFiles) != len(s.CFFiles) {
return errors.Errorf("invalid CF number of snapshot meta, expect %d, got %d",
len(s.CFFiles), len(snapshotMeta.CfFiles))
}
for i, cfFile := range s.CFFiles {
meta := snapshotMeta.CfFiles[i]
if meta.Cf != cfFile.CF {
return errors.Errorf("invalid %d CF in snapshot meta, expect %s, got %s", i, cfFile.CF, meta.Cf)
}
if util.FileExists(cfFile.Path) {
// Check only the file size for `exists()` to work correctly.
err := checkFileSize(cfFile.Path, meta.GetSize_())
if err != nil {
return err
}
}
cfFile.Size = uint64(meta.GetSize_())
cfFile.Checksum = meta.GetChecksum()
}
s.MetaFile.Meta = snapshotMeta
return nil
}
func (s *Snap) loadSnapMeta() error {
snapshotMeta, err := s.readSnapshotMeta()
if err != nil {
return err
}
err = s.setSnapshotMeta(snapshotMeta)
if err != nil {
return err
}
// check if there is a data corruption when the meta file exists
// but cf files are deleted.
if !s.Exists() {
return errors.Errorf("snapshot %s is corrupted, some cf file is missing", s.Path())
}
return nil
}
func getDisplayPath(dir string, prefix string) string {
cfNames := "(" + strings.Join(engine_util.CFs[:], "|") + ")"
return fmt.Sprintf("%s/%s_%s%s", dir, prefix, cfNames, sstFileSuffix)
}
func (s *Snap) validate() error {
for _, cfFile := range s.CFFiles {
if cfFile.Size == 0 {
// Skip empty file. The checksum of this cf file should be 0 and
// this is checked when loading the snapshot meta.
continue
}
// TODO: prepare and validate for ingestion
}
return nil
}
func (s *Snap) saveCFFiles() error {
for _, cfFile := range s.CFFiles {
if cfFile.KVCount > 0 {
err := cfFile.SstWriter.Finish()
if err != nil {
return err
}
}
cfFile.SstWriter.Close()
size, err := util.GetFileSize(cfFile.TmpPath)
if err != nil {
return err
}
if size > 0 {
err = os.Rename(cfFile.TmpPath, cfFile.Path)
if err != nil {
return errors.WithStack(err)
}
cfFile.Size = size
// add size
atomic.AddInt64(s.SizeTrack, int64(size))
cfFile.Checksum, err = util.CalcCRC32(cfFile.Path)
if err != nil {
return err
}
} else {
// Clean up the `tmp_path` if this cf file is empty.
_, err = util.DeleteFileIfExists(cfFile.TmpPath)
if err != nil {
return err
}
}
}
return nil
}
func (s *Snap) saveMetaFile() error {
bin, err := s.MetaFile.Meta.Marshal()
if err != nil {
return errors.WithStack(err)
}
_, err = s.MetaFile.File.Write(bin)
if err != nil {
return errors.WithStack(err)
}
err = os.Rename(s.MetaFile.TmpPath, s.MetaFile.Path)
if err != nil {
return errors.WithStack(err)
}
s.holdTmpFiles = false
return nil
}
func (s *Snap) Build(dbSnap *badger.Txn, region *metapb.Region, snapData *rspb.RaftSnapshotData, stat *SnapStatistics, deleter SnapshotDeleter) error {
if s.Exists() {
err := s.validate()
if err == nil {
// set snapshot meta data
snapData.FileSize = s.TotalSize()
snapData.Meta = s.MetaFile.Meta
return nil
}
log.Errorf("[region %d] file %s is corrupted, will rebuild: %v", region.Id, s.Path(), err)
if !retryDeleteSnapshot(deleter, s.key, s) {
log.Errorf("[region %d] failed to delete corrupted snapshot %s because it's already registered elsewhere",
region.Id, s.Path())
return err
}
err = s.initForBuilding()
if err != nil {
return err
}
}
builder := newSnapBuilder(s.CFFiles, dbSnap, region)
err := builder.build()
if err != nil {
return err
}
log.Infof("region %d scan snapshot %s, key count %d, size %d", region.Id, s.Path(), builder.kvCount, builder.size)
err = s.saveCFFiles()
if err != nil {
return err
}
stat.KVCount = builder.kvCount
snapshotMeta, err := genSnapshotMeta(s.CFFiles)
if err != nil {
return err
}
s.MetaFile.Meta = snapshotMeta
err = s.saveMetaFile()
if err != nil {
return err
}
totalSize := s.TotalSize()
stat.Size = totalSize
// set snapshot meta data
snapData.FileSize = totalSize
snapData.Meta = s.MetaFile.Meta
return nil
}
func (s *Snap) Path() string {
return s.displayPath
}
func (s *Snap) Exists() bool {
for _, cfFile := range s.CFFiles {
if cfFile.Size > 0 && !util.FileExists(cfFile.Path) {
return false
}
}
return util.FileExists(s.MetaFile.Path)
}
func (s *Snap) Delete() {
log.Debugf("deleting %s", s.Path())
for _, cfFile := range s.CFFiles {
if s.holdTmpFiles {
_, err := util.DeleteFileIfExists(cfFile.TmpPath)
if err != nil {
panic(err)
}
}
deleted, err := util.DeleteFileIfExists(cfFile.Path)
if err != nil {
panic(err)
}
if deleted {
atomic.AddInt64(s.SizeTrack, -int64(cfFile.Size))
}
}
_, err := util.DeleteFileIfExists(s.MetaFile.Path)
if err != nil {
panic(err)
}
if s.holdTmpFiles {
_, err := util.DeleteFileIfExists(s.MetaFile.TmpPath)
if err != nil {
panic(err)
}
}
}
func (s *Snap) Meta() (os.FileInfo, error) {
fi, err := os.Stat(s.MetaFile.Path)
if err != nil {
return nil, errors.WithStack(err)
}
return fi, nil
}
func (s *Snap) TotalSize() (total uint64) {
for _, cf := range s.CFFiles {
total += cf.Size
}
return
}
func (s *Snap) Save() error {
log.Debugf("saving to %s", s.MetaFile.Path)
for _, cfFile := range s.CFFiles {
if cfFile.Size == 0 {
// skip empty cf file.
continue
}
// Check each cf file has been fully written, and the checksum matches.
if cfFile.WrittenSize != cfFile.Size {
return errors.Errorf("snapshot file %s for CF %s size mismatch, real size %d, expected %d",
cfFile.Path, cfFile.CF, cfFile.WrittenSize, cfFile.Size)
}
checksum := cfFile.WriteDigest.Sum32()
if cfFile.Checksum != checksum {
return errors.Errorf("snapshot file %s for CF %s checksum mismatch, real checksum %d, expected %d",
cfFile.Path, cfFile.CF, checksum, cfFile.Checksum)
}
err := os.Rename(cfFile.TmpPath, cfFile.Path)
if err != nil {
return errors.WithStack(err)
}
atomic.AddInt64(s.SizeTrack, int64(cfFile.Size))
}
// write meta file
bin, err := s.MetaFile.Meta.Marshal()
if err != nil {
return errors.WithStack(err)
}
_, err = s.MetaFile.File.Write(bin)
if err != nil {
return errors.WithStack(err)
}
err = s.MetaFile.File.Sync()
if err != nil {
return errors.WithStack(err)
}
err = os.Rename(s.MetaFile.TmpPath, s.MetaFile.Path)
if err != nil {
return errors.WithStack(err)
}
s.holdTmpFiles = false
return nil
}
func (s *Snap) Apply(opts ApplyOptions) error {
err := s.validate()
if err != nil {
return err
}
externalFiles := make([]*os.File, 0, len(s.CFFiles))
for _, cfFile := range s.CFFiles {
if cfFile.Size == 0 {
// Skip empty cf file
continue
}
file, err := os.Open(cfFile.Path)
if err != nil {
log.Errorf("open ingest file %s failed: %s", cfFile.Path, err)
return err
}
externalFiles = append(externalFiles, file)
}
n, err := opts.DB.IngestExternalFiles(externalFiles)
if err != nil {
log.Errorf("ingest sst failed (first %d files succeeded): %s", n, err)
return err
}
log.Infof("apply snapshot ingested %d tables", n)
return nil
}
func (s *Snap) Read(b []byte) (int, error) {
if len(b) == 0 {
return 0, nil
}
for s.cfIndex < len(s.CFFiles) {
cfFile := s.CFFiles[s.cfIndex]
if cfFile.Size == 0 {
s.cfIndex++
continue
}
n, err := cfFile.File.Read(b)
if n > 0 {
return n, nil
}
if err != nil {
if err == io.EOF {
s.cfIndex++
continue
}
return 0, errors.WithStack(err)
}
}
return 0, io.EOF
}
func (s *Snap) Write(b []byte) (int, error) {
if len(b) == 0 {
return 0, nil
}
nextBuf := b
for s.cfIndex < len(s.CFFiles) {
cfFile := s.CFFiles[s.cfIndex]
if cfFile.Size == 0 {
s.cfIndex++
continue
}
left := cfFile.Size - cfFile.WrittenSize
if left == 0 {
s.cfIndex++
continue
}
file := cfFile.File
digest := cfFile.WriteDigest
if len(nextBuf) > int(left) {
_, err := file.Write(nextBuf[:left])
if err != nil {
return 0, errors.WithStack(err)
}
digest.Write(nextBuf[:left])
cfFile.WrittenSize += left
s.cfIndex++
nextBuf = nextBuf[left:]
} else {
_, err := file.Write(nextBuf)
if err != nil {
return 0, errors.WithStack(err)
}
digest.Write(nextBuf)
cfFile.WrittenSize += uint64(len(nextBuf))
return len(b), nil
}
}
return len(b) - len(nextBuf), nil
}
func (s *Snap) Drop() {
var cfTmpFileExists bool
for _, cfFile := range s.CFFiles {
// cleanup if some of the cf files and meta file is partly written
if util.FileExists(cfFile.TmpPath) {
cfTmpFileExists = true
break
}
}
if cfTmpFileExists || util.FileExists(s.MetaFile.TmpPath) {
s.Delete()
return
}
// cleanup if data corruption happens and any file goes missing
if !s.Exists() {
s.Delete()
}
}

View File

@ -0,0 +1,60 @@
package snap
import (
"github.com/Connor1996/badger"
"github.com/Connor1996/badger/y"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
)
// snapBuilder builds snapshot files.
type snapBuilder struct {
region *metapb.Region
txn *badger.Txn
cfFiles []*CFFile
kvCount int
size int
}
func newSnapBuilder(cfFiles []*CFFile, dbSnap *badger.Txn, region *metapb.Region) *snapBuilder {
return &snapBuilder{
region: region,
cfFiles: cfFiles,
txn: dbSnap,
}
}
func (b *snapBuilder) build() error {
defer b.txn.Discard()
startKey, endKey := b.region.StartKey, b.region.EndKey
for _, file := range b.cfFiles {
cf := file.CF
sstWriter := file.SstWriter
it := engine_util.NewCFIterator(cf, b.txn)
for it.Seek(startKey); it.Valid(); it.Next() {
item := it.Item()
key := item.Key()
if engine_util.ExceedEndKey(key, endKey) {
break
}
value, err := item.Value()
if err != nil {
return err
}
cfKey := engine_util.KeyWithCF(cf, key)
if err := sstWriter.Add(cfKey, y.ValueStruct{
Value: value,
}); err != nil {
return err
}
file.KVCount++
file.Size += uint64(len(cfKey) + len(value))
}
it.Close()
b.kvCount += file.KVCount
b.size += int(file.Size)
}
return nil
}

View File

@ -0,0 +1,345 @@
package snap
import (
"io/ioutil"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/pingcap-incubator/tinykv/log"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
)
type SnapEntry int
const (
SnapEntryGenerating SnapEntry = 1
SnapEntrySending SnapEntry = 2
SnapEntryReceiving SnapEntry = 3
SnapEntryApplying SnapEntry = 4
)
func (e SnapEntry) String() string {
switch e {
case SnapEntryGenerating:
return "generating"
case SnapEntrySending:
return "sending"
case SnapEntryReceiving:
return "receiving"
case SnapEntryApplying:
return "applying"
}
return "unknown"
}
type SnapStats struct {
ReceivingCount int
SendingCount int
}
type SnapManager struct {
base string
snapSize *int64
registryLock sync.RWMutex
registry map[SnapKey][]SnapEntry
MaxTotalSize uint64
}
func NewSnapManager(path string) *SnapManager {
return new(SnapManagerBuilder).Build(path)
}
func (sm *SnapManager) Init() error {
fi, err := os.Stat(sm.base)
if os.IsNotExist(err) {
err = os.MkdirAll(sm.base, 0600)
if err != nil {
return errors.WithStack(err)
}
return nil
} else if err != nil {
return errors.WithStack(err)
}
if !fi.IsDir() {
return errors.Errorf("%s should be a directory", sm.base)
}
fis, err := ioutil.ReadDir(sm.base)
if err != nil {
return errors.WithStack(err)
}
for _, fi := range fis {
if !fi.IsDir() {
name := fi.Name()
if strings.HasSuffix(name, tmpFileSuffix) {
err = os.Remove(filepath.Join(sm.base, name))
if err != nil {
return errors.WithStack(err)
}
} else if strings.HasSuffix(name, sstFileSuffix) {
atomic.AddInt64(sm.snapSize, fi.Size())
}
}
}
return nil
}
func (sm *SnapManager) ListIdleSnap() ([]SnapKeyWithSending, error) {
fis, err := ioutil.ReadDir(sm.base)
if err != nil {
return nil, errors.WithStack(err)
}
results := make([]SnapKeyWithSending, 0, len(fis))
for _, fi := range fis {
if fi.IsDir() {
continue
}
name := fi.Name()
if !strings.HasSuffix(name, metaFileSuffix) {
continue
}
name = name[:len(name)-len(metaFileSuffix)]
var key SnapKeyWithSending
if strings.HasPrefix(name, snapGenPrefix) {
key.IsSending = true
}
numberStrs := strings.Split(name, "_")
if len(numberStrs) != 4 {
return nil, errors.Errorf("failed to parse file %s", name)
}
key.SnapKey.RegionID, err = strconv.ParseUint(numberStrs[1], 10, 64)
if err != nil {
return nil, errors.WithStack(err)
}
key.SnapKey.Term, err = strconv.ParseUint(numberStrs[2], 10, 64)
if err != nil {
return nil, errors.WithStack(err)
}
key.SnapKey.Index, err = strconv.ParseUint(numberStrs[3], 10, 64)
if err != nil {
return nil, errors.WithStack(err)
}
sm.registryLock.RLock()
_, ok := sm.registry[key.SnapKey]
sm.registryLock.RUnlock()
if ok {
// Skip those registered snapshot.
continue
}
results = append(results, key)
}
sort.Slice(results, func(i, j int) bool {
keyI := &results[i].SnapKey
keyJ := &results[j].SnapKey
if keyI.RegionID == keyJ.RegionID {
if keyI.Term == keyJ.Term {
if keyI.Index == keyJ.Index {
return !results[i].IsSending
}
return keyI.Index < keyJ.Index
}
return keyI.Term < keyJ.Term
}
return keyI.RegionID < keyJ.RegionID
})
return results, nil
}
func (sm *SnapManager) HasRegistered(key SnapKey) bool {
sm.registryLock.RLock()
_, ok := sm.registry[key]
sm.registryLock.RUnlock()
return ok
}
func (sm *SnapManager) GetTotalSnapSize() uint64 {
return uint64(atomic.LoadInt64(sm.snapSize))
}
func (sm *SnapManager) GetSnapshotForBuilding(key SnapKey) (Snapshot, error) {
if sm.GetTotalSnapSize() > sm.MaxTotalSize {
err := sm.deleteOldIdleSnaps()
if err != nil {
return nil, err
}
}
return NewSnapForBuilding(sm.base, key, sm.snapSize, sm)
}
func (sm *SnapManager) deleteOldIdleSnaps() error {
idleSnaps, err := sm.ListIdleSnap()
if err != nil {
return err
}
type snapWithModTime struct {
key SnapKey
snap Snapshot
modTime time.Time
}
snaps := make([]snapWithModTime, 0, len(idleSnaps))
for _, idleSnap := range idleSnaps {
if !idleSnap.IsSending {
continue
}
snap, err := sm.GetSnapshotForSending(idleSnap.SnapKey)
if err != nil {
continue
}
fi, err := snap.Meta()
if err != nil {
return err
}
snaps = append(snaps, snapWithModTime{key: idleSnap.SnapKey, snap: snap, modTime: fi.ModTime()})
}
sort.Slice(snaps, func(i, j int) bool {
return snaps[i].modTime.Before(snaps[j].modTime)
})
for sm.GetTotalSnapSize() > sm.MaxTotalSize {
if len(snaps) == 0 {
return errors.New("too many snapshots")
}
oldest := snaps[0]
snaps = snaps[1:]
sm.DeleteSnapshot(oldest.key, oldest.snap, false)
}
return nil
}
func (sm *SnapManager) GetSnapshotForSending(snapKey SnapKey) (Snapshot, error) {
return NewSnapForSending(sm.base, snapKey, sm.snapSize, sm)
}
func (sm *SnapManager) GetSnapshotForReceiving(snapKey SnapKey, data []byte) (Snapshot, error) {
snapshotData := new(rspb.RaftSnapshotData)
err := snapshotData.Unmarshal(data)
if err != nil {
return nil, errors.WithStack(err)
}
return NewSnapForReceiving(sm.base, snapKey, snapshotData.Meta, sm.snapSize, sm)
}
func (sm *SnapManager) GetSnapshotForApplying(snapKey SnapKey) (Snapshot, error) {
snap, err := NewSnapForApplying(sm.base, snapKey, sm.snapSize, sm)
if err != nil {
return nil, err
}
if !snap.Exists() {
return nil, errors.Errorf("snapshot of %s not exists", snapKey)
}
return snap, nil
}
func (sm *SnapManager) Register(key SnapKey, entry SnapEntry) {
log.Debugf("register key:%s, entry:%d", key, entry)
sm.registryLock.Lock()
defer sm.registryLock.Unlock()
entries, ok := sm.registry[key]
if ok {
for _, e := range entries {
if e == entry {
log.Warnf("%s is registered more than 1 time", key)
return
}
}
}
entries = append(entries, entry)
sm.registry[key] = entries
}
func (sm *SnapManager) Deregister(key SnapKey, entry SnapEntry) {
log.Debugf("deregister key:%s, entry:%s", key, entry)
sm.registryLock.Lock()
defer sm.registryLock.Unlock()
var handled bool
entries, ok := sm.registry[key]
if ok {
for i, e := range entries {
if e == entry {
entries = append(entries[:i], entries[i+1:]...)
handled = true
break
}
}
if handled {
if len(entries) > 0 {
sm.registry[key] = entries
} else {
delete(sm.registry, key)
}
return
}
}
log.Warnf("stale deregister key:%s, entry:%s", key, entry)
}
func (sm *SnapManager) Stats() SnapStats {
sm.registryLock.RLock()
defer sm.registryLock.RUnlock()
var sendingCount, receivingCount int
for _, entries := range sm.registry {
var isSending, isReceiving bool
for _, entry := range entries {
switch entry {
case SnapEntryGenerating, SnapEntrySending:
isSending = true
case SnapEntryReceiving, SnapEntryApplying:
isReceiving = true
}
}
if isSending {
sendingCount++
}
if isReceiving {
receivingCount++
}
}
return SnapStats{SendingCount: sendingCount, ReceivingCount: receivingCount}
}
func (sm *SnapManager) DeleteSnapshot(key SnapKey, snapshot Snapshot, checkEntry bool) bool {
sm.registryLock.Lock()
defer sm.registryLock.Unlock()
if checkEntry {
if e, ok := sm.registry[key]; ok {
if len(e) > 0 {
log.Infof("skip to delete %s since it's registered more than 1, registered entries %v",
snapshot.Path(), e)
return false
}
}
} else if _, ok := sm.registry[key]; ok {
log.Infof("skip to delete %s since it's registered.", snapshot.Path())
return false
}
snapshot.Delete()
return true
}
type SnapManagerBuilder struct {
maxTotalSize uint64
}
func (smb *SnapManagerBuilder) MaxTotalSize(v uint64) *SnapManagerBuilder {
smb.maxTotalSize = v
return smb
}
func (smb *SnapManagerBuilder) Build(path string) *SnapManager {
var maxTotalSize uint64 = math.MaxUint64
if smb.maxTotalSize > 0 {
maxTotalSize = smb.maxTotalSize
}
return &SnapManager{
base: path,
snapSize: new(int64),
registry: map[SnapKey][]SnapEntry{},
MaxTotalSize: maxTotalSize,
}
}

View File

@ -0,0 +1,241 @@
package snap
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"sync/atomic"
"testing"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var (
snapTestKey = []byte("tkey")
regionTestBegin = []byte("ta")
regionTestBeginOld = []byte("ua")
regionTestEnd = []byte("tz")
regionTestEndOld = []byte("uz")
)
const (
testWriteBatchSize = 10 * 1024 * 1024
)
type dummyDeleter struct{}
func (d *dummyDeleter) DeleteSnapshot(key SnapKey, snapshot Snapshot, checkEntry bool) bool {
snapshot.Delete()
return true
}
func openDB(t *testing.T, dir string) *badger.DB {
opts := badger.DefaultOptions
opts.Dir = dir
opts.ValueDir = dir
db, err := badger.Open(opts)
require.Nil(t, err)
return db
}
func fillDBData(t *testing.T, db *badger.DB) {
// write some data for multiple cfs.
wb := new(engine_util.WriteBatch)
value := make([]byte, 32)
wb.SetCF(engine_util.CfDefault, snapTestKey, value)
wb.SetCF(engine_util.CfWrite, snapTestKey, value)
wb.SetCF(engine_util.CfLock, snapTestKey, value)
err := wb.WriteToDB(db)
require.Nil(t, err)
}
func getKVCount(t *testing.T, db *badger.DB) int {
count := 0
err := db.View(func(txn *badger.Txn) error {
for _, cf := range engine_util.CFs {
it := engine_util.NewCFIterator(cf, txn)
defer it.Close()
for it.Seek(regionTestBegin); it.Valid(); it.Next() {
if bytes.Compare(it.Item().Key(), regionTestEnd) >= 0 {
break
}
count++
}
}
return nil
})
assert.Nil(t, err)
return count
}
func genTestRegion(regionID, storeID, peerID uint64) *metapb.Region {
return &metapb.Region{
Id: regionID,
StartKey: regionTestBegin,
EndKey: regionTestEnd,
RegionEpoch: &metapb.RegionEpoch{
Version: 1,
ConfVer: 1,
},
Peers: []*metapb.Peer{
{StoreId: storeID, Id: peerID},
},
}
}
func assertEqDB(t *testing.T, expected, actual *badger.DB) {
for _, cf := range engine_util.CFs {
expectedVal := getDBValue(t, expected, cf, snapTestKey)
actualVal := getDBValue(t, actual, cf, snapTestKey)
assert.Equal(t, expectedVal, actualVal)
}
}
func getDBValue(t *testing.T, db *badger.DB, cf string, key []byte) (val []byte) {
val, err := engine_util.GetCF(db, cf, key)
require.Nil(t, err, string(key))
return val
}
func TestSnapGenMeta(t *testing.T) {
cfFiles := make([]*CFFile, 0, len(engine_util.CFs))
for i, cf := range engine_util.CFs {
f := &CFFile{
CF: cf,
Size: 100 * uint64(i+1),
Checksum: 1000 * uint32(i+1),
}
cfFiles = append(cfFiles, f)
}
meta, err := genSnapshotMeta(cfFiles)
require.Nil(t, err)
for i, cfFileMeta := range meta.CfFiles {
assert.Equal(t, cfFileMeta.Cf, cfFiles[i].CF)
assert.Equal(t, cfFileMeta.Size_, cfFiles[i].Size)
assert.Equal(t, cfFileMeta.Checksum, cfFiles[i].Checksum)
}
}
func TestSnapDisplayPath(t *testing.T) {
dir, err := ioutil.TempDir("", "snapshot")
require.Nil(t, err)
defer os.RemoveAll(dir)
key := &SnapKey{1, 1, 2}
prefix := fmt.Sprintf("%s_%s", snapGenPrefix, key)
displayPath := getDisplayPath(dir, prefix)
assert.NotEqual(t, displayPath, "")
}
func TestSnapFile(t *testing.T) {
doTestSnapFile(t, true)
doTestSnapFile(t, false)
}
func doTestSnapFile(t *testing.T, dbHasData bool) {
regionID := uint64(1)
region := genTestRegion(regionID, 1, 1)
dir, err := ioutil.TempDir("", "snapshot")
require.Nil(t, err)
defer os.RemoveAll(dir)
db := openDB(t, dir)
if dbHasData {
fillDBData(t, db)
}
snapDir, err := ioutil.TempDir("", "snapshot")
require.Nil(t, err)
defer os.RemoveAll(snapDir)
key := SnapKey{RegionID: regionID, Term: 1, Index: 1}
sizeTrack := new(int64)
deleter := &dummyDeleter{}
s1, err := NewSnapForBuilding(snapDir, key, sizeTrack, deleter)
require.Nil(t, err)
// Ensure that this snapshot file doesn't exist before being built.
assert.False(t, s1.Exists())
assert.Equal(t, int64(0), atomic.LoadInt64(sizeTrack))
snapData := new(rspb.RaftSnapshotData)
snapData.Region = region
stat := new(SnapStatistics)
assert.Nil(t, s1.Build(db.NewTransaction(false), region, snapData, stat, deleter))
// Ensure that this snapshot file does exist after being built.
assert.True(t, s1.Exists())
totalSize := s1.TotalSize()
// Ensure the `size_track` is modified correctly.
size := atomic.LoadInt64(sizeTrack)
assert.Equal(t, int64(totalSize), size)
assert.Equal(t, int64(stat.Size), size)
if dbHasData {
assert.Equal(t, 3, getKVCount(t, db))
// stat.KVCount is 5 because there are two extra default cf value.
assert.Equal(t, 3, stat.KVCount)
}
// Ensure this snapshot could be read for sending.
s2, err := NewSnapForSending(snapDir, key, sizeTrack, deleter)
require.Nil(t, err, errors.ErrorStack(err))
assert.True(t, s2.Exists())
dstDir, err := ioutil.TempDir("", "snapshot")
require.Nil(t, err)
defer os.RemoveAll(dstDir)
s3, err := NewSnapForReceiving(dstDir, key, snapData.Meta, sizeTrack, deleter)
require.Nil(t, err)
assert.False(t, s3.Exists())
// Ensure snapshot data could be read out of `s2`, and write into `s3`.
copySize, err := io.Copy(s3, s2)
require.Nil(t, err)
assert.Equal(t, copySize, size)
assert.False(t, s3.Exists())
assert.Nil(t, s3.Save())
assert.True(t, s3.Exists())
// Ensure the tracked size is handled correctly after receiving a snapshot.
assert.Equal(t, atomic.LoadInt64(sizeTrack), size*2)
// Ensure `delete()` works to delete the source snapshot.
s2.Delete()
assert.False(t, s2.Exists())
assert.False(t, s1.Exists())
assert.Equal(t, atomic.LoadInt64(sizeTrack), size)
// Ensure a snapshot could be applied to DB.
s4, err := NewSnapForApplying(dstDir, key, sizeTrack, deleter)
require.Nil(t, err)
assert.True(t, s4.Exists())
dstDBDir, err := ioutil.TempDir("", "snapshot")
require.Nil(t, err)
defer os.RemoveAll(dstDBDir)
dstDB := openDB(t, dstDBDir)
opts := ApplyOptions{
DB: dstDB,
Region: region,
}
err = s4.Apply(opts)
require.Nil(t, err, errors.ErrorStack(err))
// Ensure `delete()` works to delete the dest snapshot.
s4.Delete()
assert.False(t, s4.Exists())
assert.False(t, s3.Exists())
assert.Equal(t, atomic.LoadInt64(sizeTrack), int64(0))
// Verify the data is correct after applying snapshot.
if dbHasData {
assertEqDB(t, db, dstDB)
}
}

View File

@ -0,0 +1,309 @@
package raftstore
import (
"sync"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/raftstore/runner"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/schedulerpb"
"github.com/pingcap/errors"
)
type StoreTick int
const (
StoreTickSchedulerStoreHeartbeat StoreTick = 1
StoreTickSnapGC StoreTick = 2
)
type storeState struct {
id uint64
receiver <-chan message.Msg
ticker *ticker
}
func newStoreState(cfg *config.Config) (chan<- message.Msg, *storeState) {
ch := make(chan message.Msg, 40960)
state := &storeState{
receiver: (<-chan message.Msg)(ch),
ticker: newStoreTicker(cfg),
}
return (chan<- message.Msg)(ch), state
}
// storeWorker runs store commands.
type storeWorker struct {
*storeState
ctx *GlobalContext
}
func newStoreWorker(ctx *GlobalContext, state *storeState) *storeWorker {
return &storeWorker{
storeState: state,
ctx: ctx,
}
}
func (sw *storeWorker) run(closeCh <-chan struct{}, wg *sync.WaitGroup) {
defer wg.Done()
for {
var msg message.Msg
select {
case <-closeCh:
return
case msg = <-sw.receiver:
}
sw.handleMsg(msg)
}
}
func (d *storeWorker) onTick(tick StoreTick) {
switch tick {
case StoreTickSchedulerStoreHeartbeat:
d.onSchedulerStoreHearbeatTick()
case StoreTickSnapGC:
d.onSnapMgrGC()
}
}
func (d *storeWorker) handleMsg(msg message.Msg) {
switch msg.Type {
case message.MsgTypeStoreRaftMessage:
if err := d.onRaftMessage(msg.Data.(*rspb.RaftMessage)); err != nil {
log.Errorf("handle raft message failed storeID %d, %v", d.id, err)
}
case message.MsgTypeStoreTick:
d.onTick(msg.Data.(StoreTick))
case message.MsgTypeStoreStart:
d.start(msg.Data.(*metapb.Store))
}
}
func (d *storeWorker) start(store *metapb.Store) {
d.id = store.Id
d.ticker.scheduleStore(StoreTickSchedulerStoreHeartbeat)
d.ticker.scheduleStore(StoreTickSnapGC)
}
/// Checks if the message is targeting a stale peer.
///
/// Returns true means the message can be dropped silently.
func (d *storeWorker) checkMsg(msg *rspb.RaftMessage) (bool, error) {
regionID := msg.GetRegionId()
fromEpoch := msg.GetRegionEpoch()
msgType := msg.Message.MsgType
isVoteMsg := util.IsVoteMessage(msg.Message)
fromStoreID := msg.FromPeer.StoreId
// Check if the target is tombstone,
stateKey := meta.RegionStateKey(regionID)
localState := new(rspb.RegionLocalState)
err := engine_util.GetMeta(d.ctx.engine.Kv, stateKey, localState)
if err != nil {
if err == badger.ErrKeyNotFound {
return false, nil
}
return false, err
}
if localState.State != rspb.PeerState_Tombstone {
// Maybe split, but not registered yet.
if util.IsFirstVoteMessage(msg.Message) {
meta := d.ctx.storeMeta
// Last check on whether target peer is created, otherwise, the
// vote message will never be comsumed.
if _, ok := meta.regions[regionID]; ok {
return false, nil
}
meta.pendingVotes = append(meta.pendingVotes, msg)
log.Infof("region %d doesn't exist yet, wait for it to be split.", regionID)
return true, nil
}
return false, errors.Errorf("region %d not exists but not tombstone: %s", regionID, localState)
}
log.Debugf("region %d in tombstone state: %s", regionID, localState)
region := localState.Region
regionEpoch := region.RegionEpoch
// The region in this peer is already destroyed
if util.IsEpochStale(fromEpoch, regionEpoch) {
log.Infof("tombstone peer receives a stale message. region_id:%d, from_region_epoch:%s, current_region_epoch:%s, msg_type:%s",
regionID, fromEpoch, regionEpoch, msgType)
notExist := util.FindPeer(region, fromStoreID) == nil
handleStaleMsg(d.ctx.trans, msg, regionEpoch, isVoteMsg && notExist)
return true, nil
}
if fromEpoch.ConfVer == regionEpoch.ConfVer {
return false, errors.Errorf("tombstone peer [epoch: %s] received an invalid message %s, ignore it",
regionEpoch, msgType)
}
return false, nil
}
func (d *storeWorker) onRaftMessage(msg *rspb.RaftMessage) error {
regionID := msg.RegionId
if err := d.ctx.router.send(regionID, message.Msg{Type: message.MsgTypeRaftMessage, Data: msg}); err == nil {
return nil
}
log.Debugf("handle raft message. from_peer:%d, to_peer:%d, store:%d, region:%d, msg:%+v",
msg.FromPeer.Id, msg.ToPeer.Id, d.storeState.id, regionID, msg.Message)
if msg.ToPeer.StoreId != d.ctx.store.Id {
log.Warnf("store not match, ignore it. store_id:%d, to_store_id:%d, region_id:%d",
d.ctx.store.Id, msg.ToPeer.StoreId, regionID)
return nil
}
if msg.RegionEpoch == nil {
log.Errorf("missing region epoch in raft message, ignore it. region_id:%d", regionID)
return nil
}
if msg.IsTombstone {
// Target tombstone peer doesn't exist, so ignore it.
return nil
}
ok, err := d.checkMsg(msg)
if err != nil {
return err
}
if ok {
return nil
}
created, err := d.maybeCreatePeer(regionID, msg)
if err != nil {
return err
}
if !created {
return nil
}
_ = d.ctx.router.send(regionID, message.Msg{Type: message.MsgTypeRaftMessage, Data: msg})
return nil
}
/// If target peer doesn't exist, create it.
///
/// return false to indicate that target peer is in invalid state or
/// doesn't exist and can't be created.
func (d *storeWorker) maybeCreatePeer(regionID uint64, msg *rspb.RaftMessage) (bool, error) {
// we may encounter a message with larger peer id, which means
// current peer is stale, then we should remove current peer
meta := d.ctx.storeMeta
if _, ok := meta.regions[regionID]; ok {
return true, nil
}
if !util.IsInitialMsg(msg.Message) {
log.Debugf("target peer %s doesn't exist", msg.ToPeer)
return false, nil
}
for _, region := range meta.getOverlapRegions(&metapb.Region{
StartKey: msg.StartKey,
EndKey: msg.EndKey,
}) {
log.Debugf("msg %s is overlapped with exist region %s", msg, region)
if util.IsFirstVoteMessage(msg.Message) {
meta.pendingVotes = append(meta.pendingVotes, msg)
}
return false, nil
}
peer, err := replicatePeer(
d.ctx.store.Id, d.ctx.cfg, d.ctx.regionTaskSender, d.ctx.engine, regionID, msg.ToPeer)
if err != nil {
return false, err
}
// following snapshot may overlap, should insert into region_ranges after
// snapshot is applied.
meta.regions[regionID] = peer.Region()
d.ctx.router.register(peer)
_ = d.ctx.router.send(regionID, message.Msg{Type: message.MsgTypeStart})
return true, nil
}
func (d *storeWorker) storeHeartbeatScheduler() {
stats := new(schedulerpb.StoreStats)
stats.StoreId = d.ctx.store.Id
stats.RegionCount = uint32(len(d.ctx.storeMeta.regions))
d.ctx.schedulerTaskSender <- &runner.SchedulerStoreHeartbeatTask{
Stats: stats,
Engine: d.ctx.engine.Kv,
Path: d.ctx.engine.KvPath,
}
}
func (d *storeWorker) onSchedulerStoreHearbeatTick() {
d.storeHeartbeatScheduler()
d.ticker.scheduleStore(StoreTickSchedulerStoreHeartbeat)
}
func (d *storeWorker) handleSnapMgrGC() error {
mgr := d.ctx.snapMgr
snapKeys, err := mgr.ListIdleSnap()
if err != nil {
return err
}
if len(snapKeys) == 0 {
return nil
}
var lastRegionID uint64
var keys []snap.SnapKeyWithSending
for _, pair := range snapKeys {
key := pair.SnapKey
if lastRegionID == key.RegionID {
keys = append(keys, pair)
continue
}
if len(keys) > 0 {
err = d.scheduleGCSnap(lastRegionID, keys)
if err != nil {
return err
}
keys = nil
}
lastRegionID = key.RegionID
keys = append(keys, pair)
}
if len(keys) > 0 {
return d.scheduleGCSnap(lastRegionID, keys)
}
return nil
}
func (d *storeWorker) scheduleGCSnap(regionID uint64, keys []snap.SnapKeyWithSending) error {
gcSnap := message.Msg{Type: message.MsgTypeGcSnap, Data: &message.MsgGCSnap{Snaps: keys}}
if d.ctx.router.send(regionID, gcSnap) != nil {
// The snapshot exists because MsgAppend has been rejected. So the
// peer must have been exist. But now it's disconnected, so the peer
// has to be destroyed instead of being created.
log.Infof("region %d is disconnected, remove snaps %v", regionID, keys)
for _, pair := range keys {
key := pair.SnapKey
isSending := pair.IsSending
var snapshot snap.Snapshot
var err error
if isSending {
snapshot, err = d.ctx.snapMgr.GetSnapshotForSending(key)
} else {
snapshot, err = d.ctx.snapMgr.GetSnapshotForApplying(key)
}
if err != nil {
return err
}
d.ctx.snapMgr.DeleteSnapshot(key, snapshot, false)
}
}
return nil
}
func (d *storeWorker) onSnapMgrGC() {
if err := d.handleSnapMgrGC(); err != nil {
log.Errorf("handle snap GC failed store_id %d, err %s", d.storeState.id, err)
}
d.ticker.scheduleStore(StoreTickSnapGC)
}

130
kv/raftstore/ticker.go Normal file
View File

@ -0,0 +1,130 @@
package raftstore
import (
"time"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
)
type ticker struct {
regionID uint64
tick int64
schedules []tickSchedule
}
type tickSchedule struct {
runAt int64
interval int64
}
func newTicker(regionID uint64, cfg *config.Config) *ticker {
baseInterval := cfg.RaftBaseTickInterval
t := &ticker{
regionID: regionID,
schedules: make([]tickSchedule, 6),
}
t.schedules[int(PeerTickRaft)].interval = 1
t.schedules[int(PeerTickRaftLogGC)].interval = int64(cfg.RaftLogGCTickInterval / baseInterval)
t.schedules[int(PeerTickSplitRegionCheck)].interval = int64(cfg.SplitRegionCheckTickInterval / baseInterval)
t.schedules[int(PeerTickSchedulerHeartbeat)].interval = int64(cfg.SchedulerHeartbeatTickInterval / baseInterval)
return t
}
const SnapMgrGcTickInterval = 1 * time.Minute
func newStoreTicker(cfg *config.Config) *ticker {
baseInterval := cfg.RaftBaseTickInterval
t := &ticker{
schedules: make([]tickSchedule, 4),
}
t.schedules[int(StoreTickSchedulerStoreHeartbeat)].interval = int64(cfg.SchedulerStoreHeartbeatTickInterval / baseInterval)
t.schedules[int(StoreTickSnapGC)].interval = int64(SnapMgrGcTickInterval / baseInterval)
return t
}
// tickClock should be called when peerMsgHandler received tick message.
func (t *ticker) tickClock() {
t.tick++
}
// schedule arrange the next run for the PeerTick.
func (t *ticker) schedule(tp PeerTick) {
sched := &t.schedules[int(tp)]
if sched.interval <= 0 {
sched.runAt = -1
return
}
sched.runAt = t.tick + sched.interval
}
// isOnTick checks if the PeerTick should run.
func (t *ticker) isOnTick(tp PeerTick) bool {
sched := &t.schedules[int(tp)]
return sched.runAt == t.tick
}
func (t *ticker) isOnStoreTick(tp StoreTick) bool {
sched := &t.schedules[int(tp)]
return sched.runAt == t.tick
}
func (t *ticker) scheduleStore(tp StoreTick) {
sched := &t.schedules[int(tp)]
if sched.interval <= 0 {
sched.runAt = -1
return
}
sched.runAt = t.tick + sched.interval
}
type tickDriver struct {
baseTickInterval time.Duration
newRegionCh chan uint64
regions map[uint64]struct{}
router *router
storeTicker *ticker
}
func newTickDriver(baseTickInterval time.Duration, router *router, storeTicker *ticker) *tickDriver {
return &tickDriver{
baseTickInterval: baseTickInterval,
newRegionCh: make(chan uint64),
regions: make(map[uint64]struct{}),
router: router,
storeTicker: storeTicker,
}
}
func (r *tickDriver) run() {
timer := time.Tick(r.baseTickInterval)
for {
select {
case <-timer:
for regionID := range r.regions {
if r.router.send(regionID, message.NewPeerMsg(message.MsgTypeTick, regionID, nil)) != nil {
delete(r.regions, regionID)
}
}
r.tickStore()
case regionID, ok := <-r.newRegionCh:
if !ok {
return
}
r.regions[regionID] = struct{}{}
}
}
}
func (r *tickDriver) stop() {
close(r.newRegionCh)
}
func (r *tickDriver) tickStore() {
r.storeTicker.tickClock()
for i := range r.storeTicker.schedules {
if r.storeTicker.isOnStoreTick(StoreTick(i)) {
r.router.sendStore(message.NewMsg(message.MsgTypeStoreTick, StoreTick(i)))
}
}
}

View File

@ -0,0 +1,81 @@
package util
import (
"fmt"
"github.com/pingcap-incubator/tinykv/proto/pkg/errorpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap/errors"
)
type ErrNotLeader struct {
RegionId uint64
Leader *metapb.Peer
}
func (e *ErrNotLeader) Error() string {
return fmt.Sprintf("region %v is not leader", e.RegionId)
}
type ErrRegionNotFound struct {
RegionId uint64
}
func (e *ErrRegionNotFound) Error() string {
return fmt.Sprintf("region %v is not found", e.RegionId)
}
type ErrKeyNotInRegion struct {
Key []byte
Region *metapb.Region
}
func (e *ErrKeyNotInRegion) Error() string {
return fmt.Sprintf("key %v is not in region %v", e.Key, e.Region)
}
type ErrEpochNotMatch struct {
Message string
Regions []*metapb.Region
}
func (e *ErrEpochNotMatch) Error() string {
return fmt.Sprintf("epoch not match, error msg %v, regions %v", e.Message, e.Regions)
}
type ErrStaleCommand struct{}
func (e *ErrStaleCommand) Error() string {
return fmt.Sprintf("stale command")
}
type ErrStoreNotMatch struct {
RequestStoreId uint64
ActualStoreId uint64
}
func (e *ErrStoreNotMatch) Error() string {
return fmt.Sprintf("store not match, request store id is %v, but actual store id is %v", e.RequestStoreId, e.ActualStoreId)
}
func RaftstoreErrToPbError(e error) *errorpb.Error {
ret := new(errorpb.Error)
switch err := errors.Cause(e).(type) {
case *ErrNotLeader:
ret.NotLeader = &errorpb.NotLeader{RegionId: err.RegionId, Leader: err.Leader}
case *ErrRegionNotFound:
ret.RegionNotFound = &errorpb.RegionNotFound{RegionId: err.RegionId}
case *ErrKeyNotInRegion:
ret.KeyNotInRegion = &errorpb.KeyNotInRegion{Key: err.Key, RegionId: err.Region.Id,
StartKey: err.Region.StartKey, EndKey: err.Region.EndKey}
case *ErrEpochNotMatch:
ret.EpochNotMatch = &errorpb.EpochNotMatch{CurrentRegions: err.Regions}
case *ErrStaleCommand:
ret.StaleCommand = &errorpb.StaleCommand{}
case *ErrStoreNotMatch:
ret.StoreNotMatch = &errorpb.StoreNotMatch{RequestStoreId: err.RequestStoreId, ActualStoreId: err.ActualStoreId}
default:
ret.Message = e.Error()
}
return ret
}

View File

@ -0,0 +1,47 @@
package util
import (
"testing"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRaftstoreErrToPbError(t *testing.T) {
regionId := uint64(1)
notLeader := &ErrNotLeader{RegionId: regionId, Leader: nil}
pbErr := RaftstoreErrToPbError(notLeader)
require.NotNil(t, pbErr.NotLeader)
assert.Equal(t, pbErr.NotLeader.RegionId, regionId)
regionNotFound := &ErrRegionNotFound{RegionId: regionId}
pbErr = RaftstoreErrToPbError(regionNotFound)
require.NotNil(t, pbErr.RegionNotFound)
assert.Equal(t, pbErr.RegionNotFound.RegionId, regionId)
region := &metapb.Region{Id: regionId, StartKey: []byte{0}, EndKey: []byte{1}}
keyNotInRegion := &ErrKeyNotInRegion{Key: []byte{2}, Region: region}
pbErr = RaftstoreErrToPbError(keyNotInRegion)
require.NotNil(t, pbErr.KeyNotInRegion)
assert.Equal(t, pbErr.KeyNotInRegion.StartKey, []byte{0})
assert.Equal(t, pbErr.KeyNotInRegion.EndKey, []byte{1})
assert.Equal(t, pbErr.KeyNotInRegion.Key, []byte{2})
epochNotMatch := &ErrEpochNotMatch{Regions: []*metapb.Region{region}}
pbErr = RaftstoreErrToPbError(epochNotMatch)
require.NotNil(t, pbErr.EpochNotMatch)
assert.Equal(t, pbErr.EpochNotMatch.CurrentRegions, []*metapb.Region{region})
staleCommand := &ErrStaleCommand{}
pbErr = RaftstoreErrToPbError(staleCommand)
require.NotNil(t, pbErr.StaleCommand)
requestStoreId, actualStoreId := uint64(1), uint64(2)
storeNotMatch := &ErrStoreNotMatch{RequestStoreId: requestStoreId, ActualStoreId: actualStoreId}
pbErr = RaftstoreErrToPbError(storeNotMatch)
require.NotNil(t, pbErr.StoreNotMatch)
assert.Equal(t, pbErr.StoreNotMatch.RequestStoreId, requestStoreId)
assert.Equal(t, pbErr.StoreNotMatch.ActualStoreId, actualStoreId)
}

View File

@ -0,0 +1,38 @@
package util
import (
"io/ioutil"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
)
func NewTestEngines() *engine_util.Engines {
engines := new(engine_util.Engines)
var err error
engines.KvPath, err = ioutil.TempDir("", "tinykv_kv")
if err != nil {
panic("create kv dir failed")
}
kvOpts := badger.DefaultOptions
kvOpts.Dir = engines.KvPath
kvOpts.ValueDir = engines.KvPath
kvOpts.ValueThreshold = 256
engines.Kv, err = badger.Open(kvOpts)
if err != nil {
panic("open kv db failed")
}
engines.RaftPath, err = ioutil.TempDir("", "tinykv_raft")
if err != nil {
panic("create raft dir failed")
}
raftOpts := badger.DefaultOptions
raftOpts.Dir = engines.RaftPath
raftOpts.ValueDir = engines.RaftPath
raftOpts.ValueThreshold = 256
engines.Raft, err = badger.Open(raftOpts)
if err != nil {
panic("open raft db failed")
}
return engines
}

207
kv/raftstore/util/util.go Normal file
View File

@ -0,0 +1,207 @@
package util
import (
"bytes"
"fmt"
"github.com/golang/protobuf/proto"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/pingcap/errors"
)
const RaftInvalidIndex uint64 = 0
const InvalidID uint64 = 0
/// `is_initial_msg` checks whether the `msg` can be used to initialize a new peer or not.
// There could be two cases:
// 1. Target peer already exists but has not established communication with leader yet
// 2. Target peer is added newly due to member change or region split, but it's not
// created yet
// For both cases the region start key and end key are attached in RequestVote and
// Heartbeat message for the store of that peer to check whether to create a new peer
// when receiving these messages, or just to wait for a pending region split to perform
// later.
func IsInitialMsg(msg *eraftpb.Message) bool {
return msg.MsgType == eraftpb.MessageType_MsgRequestVote ||
// the peer has not been known to this leader, it may exist or not.
(msg.MsgType == eraftpb.MessageType_MsgHeartbeat && msg.Commit == RaftInvalidIndex)
}
/// Check if key in region range [`start_key`, `end_key`).
func CheckKeyInRegion(key []byte, region *metapb.Region) error {
if bytes.Compare(key, region.StartKey) >= 0 && (len(region.EndKey) == 0 || bytes.Compare(key, region.EndKey) < 0) {
return nil
} else {
return &ErrKeyNotInRegion{Key: key, Region: region}
}
}
/// Check if key in region range (`start_key`, `end_key`).
func CheckKeyInRegionExclusive(key []byte, region *metapb.Region) error {
if bytes.Compare(region.StartKey, key) < 0 && (len(region.EndKey) == 0 || bytes.Compare(key, region.EndKey) < 0) {
return nil
} else {
return &ErrKeyNotInRegion{Key: key, Region: region}
}
}
/// Check if key in region range [`start_key`, `end_key`].
func CheckKeyInRegionInclusive(key []byte, region *metapb.Region) error {
if bytes.Compare(key, region.StartKey) >= 0 && (len(region.EndKey) == 0 || bytes.Compare(key, region.EndKey) <= 0) {
return nil
} else {
return &ErrKeyNotInRegion{Key: key, Region: region}
}
}
/// check whether epoch is staler than check_epoch.
func IsEpochStale(epoch *metapb.RegionEpoch, checkEpoch *metapb.RegionEpoch) bool {
return epoch.Version < checkEpoch.Version || epoch.ConfVer < checkEpoch.ConfVer
}
func IsVoteMessage(msg *eraftpb.Message) bool {
tp := msg.GetMsgType()
return tp == eraftpb.MessageType_MsgRequestVote
}
/// `is_first_vote_msg` checks `msg` is the first vote message or not. It's used for
/// when the message is received but there is no such region in `Store::region_peers` and the
/// region overlaps with others. In this case we should put `msg` into `pending_votes` instead of
/// create the peer.
func IsFirstVoteMessage(msg *eraftpb.Message) bool {
return IsVoteMessage(msg) && msg.Term == meta.RaftInitLogTerm+1
}
func CheckRegionEpoch(req *raft_cmdpb.RaftCmdRequest, region *metapb.Region, includeRegion bool) error {
checkVer, checkConfVer := false, false
if req.AdminRequest == nil {
checkVer = true
} else {
switch req.AdminRequest.CmdType {
case raft_cmdpb.AdminCmdType_CompactLog, raft_cmdpb.AdminCmdType_InvalidAdmin:
case raft_cmdpb.AdminCmdType_ChangePeer:
checkConfVer = true
case raft_cmdpb.AdminCmdType_Split, raft_cmdpb.AdminCmdType_TransferLeader:
checkVer = true
checkConfVer = true
}
}
if !checkVer && !checkConfVer {
return nil
}
if req.Header == nil {
return fmt.Errorf("missing header!")
}
if req.Header.RegionEpoch == nil {
return fmt.Errorf("missing epoch!")
}
fromEpoch := req.Header.RegionEpoch
currentEpoch := region.RegionEpoch
// We must check epochs strictly to avoid key not in region error.
//
// A 3 nodes TiKV cluster with merge enabled, after commit merge, TiKV A
// tells TiDB with a epoch not match error contains the latest target Region
// info, TiDB updates its region cache and sends requests to TiKV B,
// and TiKV B has not applied commit merge yet, since the region epoch in
// request is higher than TiKV B, the request must be denied due to epoch
// not match, so it does not read on a stale snapshot, thus avoid the
// KeyNotInRegion error.
if (checkConfVer && fromEpoch.ConfVer != currentEpoch.ConfVer) ||
(checkVer && fromEpoch.Version != currentEpoch.Version) {
log.Debugf("epoch not match, region id %v, from epoch %v, current epoch %v",
region.Id, fromEpoch, currentEpoch)
regions := []*metapb.Region{}
if includeRegion {
regions = []*metapb.Region{region}
}
return &ErrEpochNotMatch{Message: fmt.Sprintf("current epoch of region %v is %v, but you sent %v",
region.Id, currentEpoch, fromEpoch), Regions: regions}
}
return nil
}
func FindPeer(region *metapb.Region, storeID uint64) *metapb.Peer {
for _, peer := range region.Peers {
if peer.StoreId == storeID {
return peer
}
}
return nil
}
func RemovePeer(region *metapb.Region, storeID uint64) *metapb.Peer {
for i, peer := range region.Peers {
if peer.StoreId == storeID {
region.Peers = append(region.Peers[:i], region.Peers[i+1:]...)
return peer
}
}
return nil
}
func ConfStateFromRegion(region *metapb.Region) (confState eraftpb.ConfState) {
for _, p := range region.Peers {
confState.Nodes = append(confState.Nodes, p.GetId())
}
return
}
func CheckStoreID(req *raft_cmdpb.RaftCmdRequest, storeID uint64) error {
peer := req.Header.Peer
if peer.StoreId == storeID {
return nil
}
return errors.Errorf("store not match %d %d", peer.StoreId, storeID)
}
func CheckTerm(req *raft_cmdpb.RaftCmdRequest, term uint64) error {
header := req.Header
if header.Term == 0 || term <= header.Term+1 {
return nil
}
// If header's term is 2 verions behind current term,
// leadership may have been changed away.
return &ErrStaleCommand{}
}
func CheckPeerID(req *raft_cmdpb.RaftCmdRequest, peerID uint64) error {
peer := req.Header.Peer
if peer.Id == peerID {
return nil
}
return errors.Errorf("mismatch peer id %d != %d", peer.Id, peerID)
}
func CloneMsg(origin, cloned proto.Message) error {
data, err := proto.Marshal(origin)
if err != nil {
return err
}
return proto.Unmarshal(data, cloned)
}
func SafeCopy(b []byte) []byte {
return append([]byte{}, b...)
}
func PeerEqual(l, r *metapb.Peer) bool {
return l.Id == r.Id && l.StoreId == r.StoreId
}
func RegionEqual(l, r *metapb.Region) bool {
if l == nil || r == nil {
return false
}
return l.Id == r.Id && l.RegionEpoch.Version == r.RegionEpoch.Version && l.RegionEpoch.ConfVer == r.RegionEpoch.ConfVer
}

View File

@ -0,0 +1,193 @@
package util
import (
"testing"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/stretchr/testify/assert"
)
func TestCheckKeyInRegion(t *testing.T) {
type Case struct {
Key []byte
StartKey []byte
EndKey []byte
IsInRegion bool
Inclusive bool
Exclusive bool
}
test_cases := []Case{
{Key: []byte{}, StartKey: []byte{}, EndKey: []byte{}, IsInRegion: true, Inclusive: true, Exclusive: false},
{Key: []byte{}, StartKey: []byte{}, EndKey: []byte{6}, IsInRegion: true, Inclusive: true, Exclusive: false},
{Key: []byte{}, StartKey: []byte{3}, EndKey: []byte{6}, IsInRegion: false, Inclusive: false, Exclusive: false},
{Key: []byte{4}, StartKey: []byte{3}, EndKey: []byte{6}, IsInRegion: true, Inclusive: true, Exclusive: true},
{Key: []byte{4}, StartKey: []byte{3}, EndKey: []byte{}, IsInRegion: true, Inclusive: true, Exclusive: true},
{Key: []byte{3}, StartKey: []byte{3}, EndKey: []byte{}, IsInRegion: true, Inclusive: true, Exclusive: false},
{Key: []byte{2}, StartKey: []byte{3}, EndKey: []byte{6}, IsInRegion: false, Inclusive: false, Exclusive: false},
{Key: []byte{}, StartKey: []byte{3}, EndKey: []byte{6}, IsInRegion: false, Inclusive: false, Exclusive: false},
{Key: []byte{}, StartKey: []byte{3}, EndKey: []byte{}, IsInRegion: false, Inclusive: false, Exclusive: false},
{Key: []byte{6}, StartKey: []byte{3}, EndKey: []byte{6}, IsInRegion: false, Inclusive: true, Exclusive: false},
}
for _, c := range test_cases {
region := new(metapb.Region)
region.StartKey = c.StartKey
region.EndKey = c.EndKey
result := CheckKeyInRegion(c.Key, region)
assert.Equal(t, result == nil, c.IsInRegion)
result = CheckKeyInRegionInclusive(c.Key, region)
assert.Equal(t, result == nil, c.Inclusive)
result = CheckKeyInRegionExclusive(c.Key, region)
assert.Equal(t, result == nil, c.Exclusive)
}
}
func TestIsInitialMsg(t *testing.T) {
type MsgInfo struct {
MessageType eraftpb.MessageType
Commit uint64
IsInitialMsg bool
}
tbl := []MsgInfo{
{MessageType: eraftpb.MessageType_MsgRequestVote, Commit: RaftInvalidIndex, IsInitialMsg: true},
{MessageType: eraftpb.MessageType_MsgHeartbeat, Commit: RaftInvalidIndex, IsInitialMsg: true},
{MessageType: eraftpb.MessageType_MsgHeartbeat, Commit: 100, IsInitialMsg: false},
{MessageType: eraftpb.MessageType_MsgAppend, Commit: 100, IsInitialMsg: false},
}
for _, m := range tbl {
msg := new(eraftpb.Message)
msg.MsgType = m.MessageType
msg.Commit = m.Commit
assert.Equal(t, IsInitialMsg(msg), m.IsInitialMsg)
}
}
func TestEpochStale(t *testing.T) {
epoch := new(metapb.RegionEpoch)
epoch.Version = 10
epoch.ConfVer = 10
type Ep struct {
Version uint64
ConfVer uint64
IsStale bool
}
tbl := []Ep{
{Version: 11, ConfVer: 10, IsStale: true},
{Version: 10, ConfVer: 11, IsStale: true},
{Version: 10, ConfVer: 10, IsStale: false},
{Version: 10, ConfVer: 9, IsStale: false},
}
for _, e := range tbl {
checkEpoch := new(metapb.RegionEpoch)
checkEpoch.Version = e.Version
checkEpoch.ConfVer = e.ConfVer
assert.Equal(t, IsEpochStale(epoch, checkEpoch), e.IsStale)
}
}
func TestCheckRegionEpoch(t *testing.T) {
epoch := new(metapb.RegionEpoch)
epoch.ConfVer = 2
epoch.Version = 2
region := new(metapb.Region)
region.RegionEpoch = epoch
// Epoch is required for most requests even if it's empty.
emptyReq := new(raft_cmdpb.RaftCmdRequest)
assert.NotNil(t, CheckRegionEpoch(emptyReq, region, false))
// These admin commands do not require epoch.
tys := []raft_cmdpb.AdminCmdType{
raft_cmdpb.AdminCmdType_CompactLog,
raft_cmdpb.AdminCmdType_InvalidAdmin,
}
for _, ty := range tys {
admin := new(raft_cmdpb.AdminRequest)
admin.CmdType = ty
req := new(raft_cmdpb.RaftCmdRequest)
req.AdminRequest = admin
// It is Okay if req does not have region epoch.
assert.Nil(t, CheckRegionEpoch(req, region, false))
req.Header = new(raft_cmdpb.RaftRequestHeader)
req.Header.RegionEpoch = epoch
assert.Nil(t, CheckRegionEpoch(req, region, true))
assert.Nil(t, CheckRegionEpoch(req, region, false))
}
// These admin commands requires epoch.version.
tys = []raft_cmdpb.AdminCmdType{
raft_cmdpb.AdminCmdType_Split,
raft_cmdpb.AdminCmdType_TransferLeader,
}
for _, ty := range tys {
admin := new(raft_cmdpb.AdminRequest)
admin.CmdType = ty
req := new(raft_cmdpb.RaftCmdRequest)
req.AdminRequest = admin
// Error if req does not have region epoch.
assert.NotNil(t, CheckRegionEpoch(req, region, false))
staleVersionEpoch := *epoch
staleVersionEpoch.Version = 1
staleRegion := new(metapb.Region)
staleVersionEpochCloned := staleVersionEpoch
staleRegion.RegionEpoch = &staleVersionEpochCloned
staleVersionEpochCloned2 := staleVersionEpoch
req.Header = new(raft_cmdpb.RaftRequestHeader)
req.Header.RegionEpoch = &staleVersionEpochCloned2
assert.Nil(t, CheckRegionEpoch(req, staleRegion, false))
latestVersionEpoch := *epoch
latestVersionEpoch.Version = 3
for _, e := range []metapb.RegionEpoch{staleVersionEpoch, latestVersionEpoch} {
eCloned := e
req.Header.RegionEpoch = &eCloned
assert.NotNil(t, CheckRegionEpoch(req, region, false))
assert.NotNil(t, CheckRegionEpoch(req, region, true))
}
}
// These admin commands requires epoch.conf_version.
for _, ty := range []raft_cmdpb.AdminCmdType{
raft_cmdpb.AdminCmdType_Split,
raft_cmdpb.AdminCmdType_ChangePeer,
raft_cmdpb.AdminCmdType_TransferLeader,
} {
admin := new(raft_cmdpb.AdminRequest)
admin.CmdType = ty
req := new(raft_cmdpb.RaftCmdRequest)
req.AdminRequest = admin
req.Header = new(raft_cmdpb.RaftRequestHeader)
// Error if req does not have region epoch.
assert.NotNil(t, CheckRegionEpoch(req, region, false))
staleConfEpoch := cloneEpoch(epoch)
staleConfEpoch.ConfVer = 1
staleRegion := new(metapb.Region)
staleRegion.RegionEpoch = cloneEpoch(staleConfEpoch)
req.Header.RegionEpoch = cloneEpoch(staleConfEpoch)
assert.Nil(t, CheckRegionEpoch(req, staleRegion, false))
latestConfEpoch := cloneEpoch(epoch)
latestConfEpoch.ConfVer = 3
for _, e := range []*metapb.RegionEpoch{staleConfEpoch, latestConfEpoch} {
req.Header.RegionEpoch = cloneEpoch(e)
assert.NotNil(t, CheckRegionEpoch(req, region, false))
assert.NotNil(t, CheckRegionEpoch(req, region, true))
}
}
}
func cloneEpoch(epoch *metapb.RegionEpoch) *metapb.RegionEpoch {
return &metapb.RegionEpoch{
ConfVer: epoch.ConfVer,
Version: epoch.Version,
}
}

110
kv/server/server.go Normal file
View File

@ -0,0 +1,110 @@
package server
import (
"context"
"github.com/pingcap-incubator/tinykv/scheduler/pkg/tsoutil"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/storage/raft_storage"
"github.com/pingcap-incubator/tinykv/kv/transaction/latches"
"github.com/pingcap-incubator/tinykv/proto/pkg/coprocessor"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/tinykvpb"
)
var _ tinykvpb.TinyKvServer = new(Server)
// Server is a TinyKV server, it 'faces outwards', sending and receiving messages from clients such as TinySQL.
type Server struct {
storage storage.Storage
// used in 4A/4B
Latches *latches.Latches
}
func NewServer(storage storage.Storage) *Server {
return &Server{
storage: storage,
Latches: latches.NewLatches(),
}
}
// The below functions are Server's gRPC API (implements TinyKvServer).
// Raw API.
func (server *Server) RawGet(_ context.Context, req *kvrpcpb.RawGetRequest) (*kvrpcpb.RawGetResponse, error) {
// Your code here (1).
return nil, nil
}
func (server *Server) RawPut(_ context.Context, req *kvrpcpb.RawPutRequest) (*kvrpcpb.RawPutResponse, error) {
// Your code here (1).
return nil, nil
}
func (server *Server) RawDelete(_ context.Context, req *kvrpcpb.RawDeleteRequest) (*kvrpcpb.RawDeleteResponse, error) {
// Your code here (1).
return nil, nil
}
func (server *Server) RawScan(_ context.Context, req *kvrpcpb.RawScanRequest) (*kvrpcpb.RawScanResponse, error) {
// Your code here (1).
return nil, nil
}
// Raft commands (tinykv <-> tinykv)
// Only used for RaftStorage, so trivially forward it.
func (server *Server) Raft(stream tinykvpb.TinyKv_RaftServer) error {
return server.storage.(*raft_storage.RaftStorage).Raft(stream)
}
// Snapshot stream (tinykv <-> tinykv)
// Only used for RaftStorage, so trivially forward it.
func (server *Server) Snapshot(stream tinykvpb.TinyKv_SnapshotServer) error {
return server.storage.(*raft_storage.RaftStorage).Snapshot(stream)
}
// Transactional API.
func (server *Server) KvGet(_ context.Context, req *kvrpcpb.GetRequest) (*kvrpcpb.GetResponse, error) {
// Your code here (4B).
return nil, nil
}
func (server *Server) KvPrewrite(_ context.Context, req *kvrpcpb.PrewriteRequest) (*kvrpcpb.PrewriteResponse, error) {
// Your code here (4B).
return nil, nil
}
func (server *Server) KvCommit(_ context.Context, req *kvrpcpb.CommitRequest) (*kvrpcpb.CommitResponse, error) {
// Your code here (4B).
return nil, nil
}
func (server *Server) KvScan(_ context.Context, req *kvrpcpb.ScanRequest) (*kvrpcpb.ScanResponse, error) {
// Your code here (4C).
return nil, nil
}
func (server *Server) KvCheckTxnStatus(_ context.Context, req *kvrpcpb.CheckTxnStatusRequest) (*kvrpcpb.CheckTxnStatusResponse, error) {
// Your code here (4C).
return nil, nil
}
func (server *Server) KvBatchRollback(_ context.Context, req *kvrpcpb.BatchRollbackRequest) (*kvrpcpb.BatchRollbackResponse, error) {
// Your code here (4C).
return nil, nil
}
func (server *Server) KvResolveLock(_ context.Context, req *kvrpcpb.ResolveLockRequest) (*kvrpcpb.ResolveLockResponse, error) {
// Your code here (4C).
return nil, nil
}
// SQL push down commands.
func (server *Server) Coprocessor(_ context.Context, req *coprocessor.Request) (*coprocessor.Response, error) {
return &coprocessor.Response{}, nil
}
// PhysicalTime returns the physical time part of the timestamp.
func PhysicalTime(ts uint64) uint64 {
return ts >> tsoutil.PhysicalShiftBits
}

326
kv/server/server_test.go Normal file
View File

@ -0,0 +1,326 @@
package server
import (
"os"
"testing"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/storage/standalone_storage"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"github.com/stretchr/testify/assert"
)
func Set(s *standalone_storage.StandAloneStorage, cf string, key []byte, value []byte) error {
return s.Write(nil, []storage.Modify{
{
Data: storage.Put{
Cf: cf,
Key: key,
Value: value,
},
},
})
}
func Get(s *standalone_storage.StandAloneStorage, cf string, key []byte) ([]byte, error) {
reader, err := s.Reader(nil)
if err != nil {
return nil, err
}
return reader.GetCF(cf, key)
}
func Iter(s *standalone_storage.StandAloneStorage, cf string) (engine_util.DBIterator, error) {
reader, err := s.Reader(nil)
if err != nil {
return nil, err
}
return reader.IterCF(cf), nil
}
func cleanUpTestData(conf *config.Config) error {
if conf != nil {
return os.RemoveAll(conf.DBPath)
}
return nil
}
func TestRawGet1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
Set(s, cf, []byte{99}, []byte{42})
req := &kvrpcpb.RawGetRequest{
Key: []byte{99},
Cf: cf,
}
resp, err := server.RawGet(nil, req)
assert.Nil(t, err)
assert.Equal(t, []byte{42}, resp.Value)
}
func TestRawGetNotFound1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
req := &kvrpcpb.RawGetRequest{
Key: []byte{99},
Cf: cf,
}
resp, err := server.RawGet(nil, req)
assert.Nil(t, err)
assert.True(t, resp.NotFound)
}
func TestRawPut1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
req := &kvrpcpb.RawPutRequest{
Key: []byte{99},
Value: []byte{42},
Cf: cf,
}
_, err := server.RawPut(nil, req)
got, err := Get(s, cf, []byte{99})
assert.Nil(t, err)
assert.Equal(t, []byte{42}, got)
}
func TestRawGetAfterRawPut1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
put1 := &kvrpcpb.RawPutRequest{
Key: []byte{99},
Value: []byte{42},
Cf: engine_util.CfDefault,
}
_, err := server.RawPut(nil, put1)
assert.Nil(t, err)
put2 := &kvrpcpb.RawPutRequest{
Key: []byte{99},
Value: []byte{44},
Cf: engine_util.CfWrite,
}
_, err = server.RawPut(nil, put2)
assert.Nil(t, err)
get1 := &kvrpcpb.RawGetRequest{
Key: []byte{99},
Cf: engine_util.CfDefault,
}
resp, err := server.RawGet(nil, get1)
assert.Nil(t, err)
assert.Equal(t, []byte{42}, resp.Value)
get2 := &kvrpcpb.RawGetRequest{
Key: []byte{99},
Cf: engine_util.CfWrite,
}
resp, err = server.RawGet(nil, get2)
assert.Nil(t, err)
assert.Equal(t, []byte{44}, resp.Value)
}
func TestRawGetAfterRawDelete1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
assert.Nil(t, Set(s, cf, []byte{99}, []byte{42}))
delete := &kvrpcpb.RawDeleteRequest{
Key: []byte{99},
Cf: cf,
}
get := &kvrpcpb.RawGetRequest{
Key: []byte{99},
Cf: cf,
}
_, err := server.RawDelete(nil, delete)
assert.Nil(t, err)
resp, err := server.RawGet(nil, get)
assert.Nil(t, err)
assert.True(t, resp.NotFound)
}
func TestRawDelete1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
req := &kvrpcpb.RawDeleteRequest{
Key: []byte{99},
Cf: cf,
}
_, err := server.RawDelete(nil, req)
assert.Nil(t, err)
_, err = Get(s, cf, []byte{99})
assert.Equal(t, err, badger.ErrKeyNotFound)
}
func TestRawScan1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
Set(s, cf, []byte{1}, []byte{233, 1})
Set(s, cf, []byte{2}, []byte{233, 2})
Set(s, cf, []byte{3}, []byte{233, 3})
Set(s, cf, []byte{4}, []byte{233, 4})
Set(s, cf, []byte{5}, []byte{233, 5})
req := &kvrpcpb.RawScanRequest{
StartKey: []byte{1},
Limit: 3,
Cf: cf,
}
resp, err := server.RawScan(nil, req)
assert.Nil(t, err)
assert.Equal(t, 3, len(resp.Kvs))
expectedKeys := [][]byte{{1}, {2}, {3}}
for i, kv := range resp.Kvs {
assert.Equal(t, expectedKeys[i], kv.Key)
assert.Equal(t, append([]byte{233}, expectedKeys[i]...), kv.Value)
}
}
func TestRawScanAfterRawPut1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
assert.Nil(t, Set(s, cf, []byte{1}, []byte{233, 1}))
assert.Nil(t, Set(s, cf, []byte{2}, []byte{233, 2}))
assert.Nil(t, Set(s, cf, []byte{3}, []byte{233, 3}))
assert.Nil(t, Set(s, cf, []byte{4}, []byte{233, 4}))
put := &kvrpcpb.RawPutRequest{
Key: []byte{5},
Value: []byte{233, 5},
Cf: cf,
}
scan := &kvrpcpb.RawScanRequest{
StartKey: []byte{1},
Limit: 10,
Cf: cf,
}
expectedKeys := [][]byte{{1}, {2}, {3}, {4}, {5}}
_, err := server.RawPut(nil, put)
assert.Nil(t, err)
resp, err := server.RawScan(nil, scan)
assert.Nil(t, err)
assert.Equal(t, len(resp.Kvs), len(expectedKeys))
for i, kv := range resp.Kvs {
assert.Equal(t, expectedKeys[i], kv.Key)
assert.Equal(t, append([]byte{233}, expectedKeys[i]...), kv.Value)
}
}
func TestRawScanAfterRawDelete1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
assert.Nil(t, Set(s, cf, []byte{1}, []byte{233, 1}))
assert.Nil(t, Set(s, cf, []byte{2}, []byte{233, 2}))
assert.Nil(t, Set(s, cf, []byte{3}, []byte{233, 3}))
assert.Nil(t, Set(s, cf, []byte{4}, []byte{233, 4}))
delete := &kvrpcpb.RawDeleteRequest{
Key: []byte{3},
Cf: cf,
}
scan := &kvrpcpb.RawScanRequest{
StartKey: []byte{1},
Limit: 10,
Cf: cf,
}
expectedKeys := [][]byte{{1}, {2}, {4}}
_, err := server.RawDelete(nil, delete)
assert.Nil(t, err)
resp, err := server.RawScan(nil, scan)
assert.Nil(t, err)
assert.Equal(t, len(resp.Kvs), len(expectedKeys))
for i, kv := range resp.Kvs {
assert.Equal(t, expectedKeys[i], kv.Key)
assert.Equal(t, append([]byte{233}, expectedKeys[i]...), kv.Value)
}
}
func TestIterWithRawDelete1(t *testing.T) {
conf := config.NewTestConfig()
s := standalone_storage.NewStandAloneStorage(conf)
server := NewServer(s)
defer cleanUpTestData(conf)
cf := engine_util.CfDefault
assert.Nil(t, Set(s, cf, []byte{1}, []byte{233, 1}))
assert.Nil(t, Set(s, cf, []byte{2}, []byte{233, 2}))
assert.Nil(t, Set(s, cf, []byte{3}, []byte{233, 3}))
assert.Nil(t, Set(s, cf, []byte{4}, []byte{233, 4}))
it, err := Iter(s, cf)
assert.Nil(t, err)
delete := &kvrpcpb.RawDeleteRequest{
Key: []byte{3},
Cf: cf,
}
_, err = server.RawDelete(nil, delete)
assert.Nil(t, err)
expectedKeys := [][]byte{{1}, {2}, {3}, {4}}
i := 0
for it.Seek([]byte{1}); it.Valid(); it.Next() {
item := it.Item()
key := item.Key()
assert.Equal(t, expectedKeys[i], key)
i++
}
}

242
kv/storage/mem_storage.go Normal file
View File

@ -0,0 +1,242 @@
package storage
import (
"bytes"
"fmt"
"github.com/Connor1996/badger/y"
"github.com/petar/GoLLRB/llrb"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
)
// MemStorage is an in-memory storage engine used for testing. Data is not written to disk, nor sent to other
// nodes. It is intended for testing only.
type MemStorage struct {
CfDefault *llrb.LLRB
CfLock *llrb.LLRB
CfWrite *llrb.LLRB
}
func NewMemStorage() *MemStorage {
return &MemStorage{
CfDefault: llrb.New(),
CfLock: llrb.New(),
CfWrite: llrb.New(),
}
}
func (s *MemStorage) Start() error {
return nil
}
func (s *MemStorage) Stop() error {
return nil
}
func (s *MemStorage) Reader(ctx *kvrpcpb.Context) (StorageReader, error) {
return &memReader{s}, nil
}
func (s *MemStorage) Write(ctx *kvrpcpb.Context, batch []Modify) error {
for _, m := range batch {
switch data := m.Data.(type) {
case Put:
item := memItem{data.Key, data.Value, false}
switch data.Cf {
case engine_util.CfDefault:
s.CfDefault.ReplaceOrInsert(item)
case engine_util.CfLock:
s.CfLock.ReplaceOrInsert(item)
case engine_util.CfWrite:
s.CfWrite.ReplaceOrInsert(item)
}
case Delete:
item := memItem{key: data.Key}
switch data.Cf {
case engine_util.CfDefault:
s.CfDefault.Delete(item)
case engine_util.CfLock:
s.CfLock.Delete(item)
case engine_util.CfWrite:
s.CfWrite.Delete(item)
}
}
}
return nil
}
func (s *MemStorage) Get(cf string, key []byte) []byte {
item := memItem{key: key}
var result llrb.Item
switch cf {
case engine_util.CfDefault:
result = s.CfDefault.Get(item)
case engine_util.CfLock:
result = s.CfLock.Get(item)
case engine_util.CfWrite:
result = s.CfWrite.Get(item)
}
if result == nil {
return nil
}
return result.(memItem).value
}
func (s *MemStorage) Set(cf string, key []byte, value []byte) {
item := memItem{key, value, true}
switch cf {
case engine_util.CfDefault:
s.CfDefault.ReplaceOrInsert(item)
case engine_util.CfLock:
s.CfLock.ReplaceOrInsert(item)
case engine_util.CfWrite:
s.CfWrite.ReplaceOrInsert(item)
}
}
func (s *MemStorage) HasChanged(cf string, key []byte) bool {
item := memItem{key: key}
var result llrb.Item
switch cf {
case engine_util.CfDefault:
result = s.CfDefault.Get(item)
case engine_util.CfLock:
result = s.CfLock.Get(item)
case engine_util.CfWrite:
result = s.CfWrite.Get(item)
}
if result == nil {
return true
}
return !result.(memItem).fresh
}
func (s *MemStorage) Len(cf string) int {
switch cf {
case engine_util.CfDefault:
return s.CfDefault.Len()
case engine_util.CfLock:
return s.CfLock.Len()
case engine_util.CfWrite:
return s.CfWrite.Len()
}
return -1
}
// memReader is a StorageReader which reads from a MemStorage.
type memReader struct {
inner *MemStorage
}
func (mr *memReader) GetCF(cf string, key []byte) ([]byte, error) {
item := memItem{key: key}
var result llrb.Item
switch cf {
case engine_util.CfDefault:
result = mr.inner.CfDefault.Get(item)
case engine_util.CfLock:
result = mr.inner.CfLock.Get(item)
case engine_util.CfWrite:
result = mr.inner.CfWrite.Get(item)
default:
return nil, fmt.Errorf("mem-server: bad CF %s", cf)
}
if result == nil {
return nil, nil
}
return result.(memItem).value, nil
}
func (mr *memReader) IterCF(cf string) engine_util.DBIterator {
var data *llrb.LLRB
switch cf {
case engine_util.CfDefault:
data = mr.inner.CfDefault
case engine_util.CfLock:
data = mr.inner.CfLock
case engine_util.CfWrite:
data = mr.inner.CfWrite
default:
return nil
}
min := data.Min()
if min == nil {
return &memIter{data, memItem{}}
}
return &memIter{data, min.(memItem)}
}
func (r *memReader) Close() {}
type memIter struct {
data *llrb.LLRB
item memItem
}
func (it *memIter) Item() engine_util.DBItem {
return it.item
}
func (it *memIter) Valid() bool {
return it.item.key != nil
}
func (it *memIter) Next() {
first := true
oldItem := it.item
it.item = memItem{}
it.data.AscendGreaterOrEqual(oldItem, func(item llrb.Item) bool {
// Skip the first item, which will be it.item
if first {
first = false
return true
}
it.item = item.(memItem)
return false
})
}
func (it *memIter) Seek(key []byte) {
it.item = memItem{}
it.data.AscendGreaterOrEqual(memItem{key: key}, func(item llrb.Item) bool {
it.item = item.(memItem)
return false
})
}
func (it *memIter) Close() {}
type memItem struct {
key []byte
value []byte
fresh bool
}
func (it memItem) Key() []byte {
return it.key
}
func (it memItem) KeyCopy(dst []byte) []byte {
return y.SafeCopy(dst, it.key)
}
func (it memItem) Value() ([]byte, error) {
return it.value, nil
}
func (it memItem) ValueSize() int {
return len(it.value)
}
func (it memItem) ValueCopy(dst []byte) ([]byte, error) {
return y.SafeCopy(dst, it.value), nil
}
func (it memItem) Less(than llrb.Item) bool {
other := than.(memItem)
return bytes.Compare(it.key, other.key) < 0
}

37
kv/storage/modify.go Normal file
View File

@ -0,0 +1,37 @@
package storage
// Modify is a single modification to TinyKV's underlying storage.
type Modify struct {
Data interface{}
}
type Put struct {
Key []byte
Value []byte
Cf string
}
type Delete struct {
Key []byte
Cf string
}
func (m *Modify) Key() []byte {
switch m.Data.(type) {
case Put:
return m.Data.(Put).Key
case Delete:
return m.Data.(Delete).Key
}
return nil
}
func (m *Modify) Cf() string {
switch m.Data.(type) {
case Put:
return m.Data.(Put).Cf
case Delete:
return m.Data.(Delete).Cf
}
return ""
}

View File

@ -0,0 +1,133 @@
package raft_storage
import (
"context"
"sync"
"time"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/tinykvpb"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
)
type raftConn struct {
streamMu sync.Mutex
stream tinykvpb.TinyKv_RaftClient
ctx context.Context
cancel context.CancelFunc
}
func newRaftConn(addr string, cfg *config.Config) (*raftConn, error) {
cc, err := grpc.Dial(addr, grpc.WithInsecure(),
grpc.WithInitialWindowSize(2*1024*1024),
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: 3 * time.Second,
Timeout: 60 * time.Second,
PermitWithoutStream: true,
}))
if err != nil {
return nil, err
}
ctx, cancel := context.WithCancel(context.Background())
stream, err := tinykvpb.NewTinyKvClient(cc).Raft(ctx)
if err != nil {
cancel()
return nil, err
}
return &raftConn{
stream: stream,
ctx: ctx,
cancel: cancel,
}, nil
}
func (c *raftConn) Stop() {
c.cancel()
}
func (c *raftConn) Send(msg *raft_serverpb.RaftMessage) error {
c.streamMu.Lock()
defer c.streamMu.Unlock()
return c.stream.Send(msg)
}
type connKey struct {
addr string
index int
}
type RaftClient struct {
config *config.Config
sync.RWMutex
conn *raftConn
addrs map[uint64]string
}
func newRaftClient(config *config.Config) *RaftClient {
return &RaftClient{
config: config,
addrs: make(map[uint64]string),
}
}
func (c *RaftClient) getConn(addr string, regionID uint64) (*raftConn, error) {
c.RLock()
if c.conn != nil {
c.RUnlock()
return c.conn, nil
}
c.RUnlock()
newConn, err := newRaftConn(addr, c.config)
if err != nil {
return nil, err
}
c.Lock()
defer c.Unlock()
if c.conn != nil {
newConn.Stop()
return c.conn, nil
}
c.conn = newConn
return newConn, nil
}
func (c *RaftClient) Send(storeID uint64, addr string, msg *raft_serverpb.RaftMessage) error {
conn, err := c.getConn(addr, msg.GetRegionId())
if err != nil {
return err
}
err = conn.Send(msg)
if err == nil {
return nil
}
log.Error("raft client failed to send")
c.Lock()
defer c.Unlock()
conn.Stop()
c.conn = nil
if oldAddr, ok := c.addrs[storeID]; ok && oldAddr == addr {
delete(c.addrs, storeID)
}
return err
}
func (c *RaftClient) GetAddr(storeID uint64) string {
c.RLock()
defer c.RUnlock()
v, _ := c.addrs[storeID]
return v
}
func (c *RaftClient) InsertAddr(storeID uint64, addr string) {
c.Lock()
defer c.Unlock()
c.addrs[storeID] = addr
}
func (c *RaftClient) Flush() {
// Not support BufferHint
}

View File

@ -0,0 +1,222 @@
package raft_storage
import (
"context"
"os"
"path/filepath"
"strings"
"sync"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/scheduler_client"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/proto/pkg/errorpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/tinykvpb"
"github.com/pingcap/errors"
)
// RaftStorage is an implementation of `Storage` (see tikv/server.go) backed by a Raft node. It is part of a Raft network.
// By using Raft, reads and writes are consistent with other nodes in the TinyKV instance.
type RaftStorage struct {
engines *engine_util.Engines
config *config.Config
node *raftstore.Node
snapManager *snap.SnapManager
raftRouter *raftstore.RaftstoreRouter
batchSystem *raftstore.RaftBatchSystem
resolveWorker *worker.Worker
snapWorker *worker.Worker
wg sync.WaitGroup
}
type RegionError struct {
RequestErr *errorpb.Error
}
func (re *RegionError) Error() string {
return re.RequestErr.String()
}
func (rs *RaftStorage) checkResponse(resp *raft_cmdpb.RaftCmdResponse, reqCount int) error {
if resp.Header.Error != nil {
return &RegionError{RequestErr: resp.Header.Error}
}
if len(resp.Responses) != reqCount {
return errors.Errorf("responses count %d is not equal to requests count %d",
len(resp.Responses), reqCount)
}
return nil
}
// NewRaftStorage creates a new storage engine backed by a raftstore.
func NewRaftStorage(conf *config.Config) *RaftStorage {
dbPath := conf.DBPath
kvPath := filepath.Join(dbPath, "kv")
raftPath := filepath.Join(dbPath, "raft")
snapPath := filepath.Join(dbPath, "snap")
os.MkdirAll(kvPath, os.ModePerm)
os.MkdirAll(raftPath, os.ModePerm)
os.Mkdir(snapPath, os.ModePerm)
raftDB := engine_util.CreateDB("raft", conf)
kvDB := engine_util.CreateDB("kv", conf)
engines := engine_util.NewEngines(kvDB, raftDB, kvPath, raftPath)
return &RaftStorage{engines: engines, config: conf}
}
func (rs *RaftStorage) Write(ctx *kvrpcpb.Context, batch []storage.Modify) error {
var reqs []*raft_cmdpb.Request
for _, m := range batch {
switch m.Data.(type) {
case storage.Put:
put := m.Data.(storage.Put)
reqs = append(reqs, &raft_cmdpb.Request{
CmdType: raft_cmdpb.CmdType_Put,
Put: &raft_cmdpb.PutRequest{
Cf: put.Cf,
Key: put.Key,
Value: put.Value,
}})
case storage.Delete:
delete := m.Data.(storage.Delete)
reqs = append(reqs, &raft_cmdpb.Request{
CmdType: raft_cmdpb.CmdType_Delete,
Delete: &raft_cmdpb.DeleteRequest{
Cf: delete.Cf,
Key: delete.Key,
}})
}
}
header := &raft_cmdpb.RaftRequestHeader{
RegionId: ctx.RegionId,
Peer: ctx.Peer,
RegionEpoch: ctx.RegionEpoch,
Term: ctx.Term,
}
request := &raft_cmdpb.RaftCmdRequest{
Header: header,
Requests: reqs,
}
cb := message.NewCallback()
if err := rs.raftRouter.SendRaftCommand(request, cb); err != nil {
return err
}
return rs.checkResponse(cb.WaitResp(), len(reqs))
}
func (rs *RaftStorage) Reader(ctx *kvrpcpb.Context) (storage.StorageReader, error) {
header := &raft_cmdpb.RaftRequestHeader{
RegionId: ctx.RegionId,
Peer: ctx.Peer,
RegionEpoch: ctx.RegionEpoch,
Term: ctx.Term,
}
request := &raft_cmdpb.RaftCmdRequest{
Header: header,
Requests: []*raft_cmdpb.Request{{
CmdType: raft_cmdpb.CmdType_Snap,
Snap: &raft_cmdpb.SnapRequest{},
}},
}
cb := message.NewCallback()
if err := rs.raftRouter.SendRaftCommand(request, cb); err != nil {
return nil, err
}
resp := cb.WaitResp()
if err := rs.checkResponse(resp, 1); err != nil {
if cb.Txn != nil {
cb.Txn.Discard()
}
return nil, err
}
if cb.Txn == nil {
panic("can not found region snap")
}
if len(resp.Responses) != 1 {
panic("wrong response count for snap cmd")
}
return NewRegionReader(cb.Txn, *resp.Responses[0].GetSnap().Region), nil
}
func (rs *RaftStorage) Raft(stream tinykvpb.TinyKv_RaftServer) error {
for {
msg, err := stream.Recv()
if err != nil {
return err
}
rs.raftRouter.SendRaftMessage(msg)
}
}
func (rs *RaftStorage) Snapshot(stream tinykvpb.TinyKv_SnapshotServer) error {
var err error
done := make(chan struct{})
rs.snapWorker.Sender() <- &recvSnapTask{
stream: stream,
callback: func(e error) {
err = e
close(done)
},
}
<-done
return err
}
func (rs *RaftStorage) Start() error {
cfg := rs.config
schedulerClient, err := scheduler_client.NewClient(strings.Split(cfg.SchedulerAddr, ","), "")
if err != nil {
return err
}
rs.raftRouter, rs.batchSystem = raftstore.CreateRaftBatchSystem(cfg)
rs.resolveWorker = worker.NewWorker("resolver", &rs.wg)
resolveSender := rs.resolveWorker.Sender()
resolveRunner := newResolverRunner(schedulerClient)
rs.resolveWorker.Start(resolveRunner)
rs.snapManager = snap.NewSnapManager(cfg.DBPath + "snap")
rs.snapWorker = worker.NewWorker("snap-worker", &rs.wg)
snapSender := rs.snapWorker.Sender()
snapRunner := newSnapRunner(rs.snapManager, rs.config, rs.raftRouter)
rs.snapWorker.Start(snapRunner)
raftClient := newRaftClient(cfg)
trans := NewServerTransport(raftClient, snapSender, rs.raftRouter, resolveSender)
rs.node = raftstore.NewNode(rs.batchSystem, rs.config, schedulerClient)
err = rs.node.Start(context.TODO(), rs.engines, trans, rs.snapManager)
if err != nil {
return err
}
return nil
}
func (rs *RaftStorage) Stop() error {
rs.snapWorker.Stop()
rs.node.Stop()
rs.resolveWorker.Stop()
rs.wg.Wait()
if err := rs.engines.Raft.Close(); err != nil {
return err
}
if err := rs.engines.Kv.Close(); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,86 @@
package raft_storage
import (
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
)
type RegionReader struct {
txn *badger.Txn
region *metapb.Region
}
func NewRegionReader(txn *badger.Txn, region metapb.Region) *RegionReader {
return &RegionReader{
txn: txn,
region: &region,
}
}
func (r *RegionReader) GetCF(cf string, key []byte) ([]byte, error) {
if err := util.CheckKeyInRegion(key, r.region); err != nil {
return nil, err
}
return engine_util.GetCFFromTxn(r.txn, cf, key)
}
func (r *RegionReader) IterCF(cf string) engine_util.DBIterator {
return NewRegionIterator(engine_util.NewCFIterator(cf, r.txn), r.region)
}
func (r *RegionReader) Close() {
r.txn.Discard()
}
// RegionIterator wraps a db iterator and only allow it to iterate in the region. It behaves as if underlying
// db only contains one region.
type RegionIterator struct {
iter *engine_util.BadgerIterator
region *metapb.Region
}
func NewRegionIterator(iter *engine_util.BadgerIterator, region *metapb.Region) *RegionIterator {
return &RegionIterator{
iter: iter,
region: region,
}
}
func (it *RegionIterator) Item() engine_util.DBItem {
return it.iter.Item()
}
func (it *RegionIterator) Valid() bool {
if !it.iter.Valid() || engine_util.ExceedEndKey(it.iter.Item().Key(), it.region.EndKey) {
return false
}
return true
}
func (it *RegionIterator) ValidForPrefix(prefix []byte) bool {
if !it.iter.ValidForPrefix(prefix) || engine_util.ExceedEndKey(it.iter.Item().Key(), it.region.EndKey) {
return false
}
return true
}
func (it *RegionIterator) Close() {
it.iter.Close()
}
func (it *RegionIterator) Next() {
it.iter.Next()
}
func (it *RegionIterator) Seek(key []byte) {
if err := util.CheckKeyInRegion(key, it.region); err != nil {
panic(err)
}
it.iter.Seek(key)
}
func (it *RegionIterator) Rewind() {
it.iter.Rewind()
}

View File

@ -0,0 +1,67 @@
package raft_storage
import (
"context"
"time"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/kv/raftstore/scheduler_client"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap/errors"
)
// Handle will resolve t's storeID into the address of the TinyKV node which should handle t. t's callback is then
// called with that address.
func (r *resolverRunner) Handle(t worker.Task) {
data := t.(*resolveAddrTask)
data.callback(r.getAddr(data.storeID))
}
const storeAddressRefreshSeconds = 60
type storeAddr struct {
addr string
lastUpdate time.Time
}
type resolverRunner struct {
schedulerClient scheduler_client.Client
storeAddrs map[uint64]storeAddr
}
type resolveAddrTask struct {
storeID uint64
callback func(addr string, err error)
}
func newResolverRunner(schedulerClient scheduler_client.Client) *resolverRunner {
return &resolverRunner{
schedulerClient: schedulerClient,
storeAddrs: make(map[uint64]storeAddr),
}
}
func (r *resolverRunner) getAddr(id uint64) (string, error) {
if sa, ok := r.storeAddrs[id]; ok {
if time.Since(sa.lastUpdate).Seconds() < storeAddressRefreshSeconds {
return sa.addr, nil
}
}
store, err := r.schedulerClient.GetStore(context.TODO(), id)
if err != nil {
return "", err
}
if store.GetState() == metapb.StoreState_Tombstone {
return "", errors.Errorf("store %d has been removed", id)
}
addr := store.GetAddress()
if addr == "" {
return "", errors.Errorf("invalid empty address for store %d", id)
}
r.storeAddrs[id] = storeAddr{
addr: addr,
lastUpdate: time.Now(),
}
return addr, nil
}

View File

@ -0,0 +1,182 @@
package raft_storage
import (
"bytes"
"context"
"io"
"time"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/tinykvpb"
"github.com/pingcap/errors"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
)
type sendSnapTask struct {
addr string
msg *raft_serverpb.RaftMessage
callback func(error)
}
type recvSnapTask struct {
stream tinykvpb.TinyKv_SnapshotServer
callback func(error)
}
type snapRunner struct {
config *config.Config
snapManager *snap.SnapManager
router message.RaftRouter
}
func newSnapRunner(snapManager *snap.SnapManager, config *config.Config, router message.RaftRouter) *snapRunner {
return &snapRunner{
config: config,
snapManager: snapManager,
router: router,
}
}
func (r *snapRunner) Handle(t worker.Task) {
switch t.(type) {
case *sendSnapTask:
r.send(t.(*sendSnapTask))
case *recvSnapTask:
r.recv(t.(*recvSnapTask))
}
}
func (r *snapRunner) send(t *sendSnapTask) {
t.callback(r.sendSnap(t.addr, t.msg))
}
const snapChunkLen = 1024 * 1024
func (r *snapRunner) sendSnap(addr string, msg *raft_serverpb.RaftMessage) error {
start := time.Now()
msgSnap := msg.GetMessage().GetSnapshot()
snapKey, err := snap.SnapKeyFromSnap(msgSnap)
if err != nil {
return err
}
r.snapManager.Register(snapKey, snap.SnapEntrySending)
defer r.snapManager.Deregister(snapKey, snap.SnapEntrySending)
snap, err := r.snapManager.GetSnapshotForSending(snapKey)
if err != nil {
return err
}
if !snap.Exists() {
return errors.Errorf("missing snap file: %v", snap.Path())
}
cc, err := grpc.Dial(addr, grpc.WithInsecure(),
grpc.WithInitialWindowSize(2*1024*1024),
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: 3 * time.Second,
Timeout: 60 * time.Second,
}))
if err != nil {
return err
}
client := tinykvpb.NewTinyKvClient(cc)
stream, err := client.Snapshot(context.TODO())
if err != nil {
return err
}
err = stream.Send(&raft_serverpb.SnapshotChunk{Message: msg})
if err != nil {
return err
}
buf := make([]byte, snapChunkLen)
for remain := snap.TotalSize(); remain > 0; remain -= uint64(len(buf)) {
if remain < uint64(len(buf)) {
buf = buf[:remain]
}
_, err := io.ReadFull(snap, buf)
if err != nil {
return errors.Errorf("failed to read snapshot chunk: %v", err)
}
err = stream.Send(&raft_serverpb.SnapshotChunk{Data: buf})
if err != nil {
return err
}
}
_, err = stream.CloseAndRecv()
if err != nil {
return err
}
log.Infof("sent snapshot. regionID: %v, snapKey: %v, size: %v, duration: %s", snapKey.RegionID, snapKey, snap.TotalSize(), time.Since(start))
return nil
}
func (r *snapRunner) recv(t *recvSnapTask) {
msg, err := r.recvSnap(t.stream)
if err == nil {
r.router.SendRaftMessage(msg)
}
t.callback(err)
}
func (r *snapRunner) recvSnap(stream tinykvpb.TinyKv_SnapshotServer) (*raft_serverpb.RaftMessage, error) {
head, err := stream.Recv()
if err != nil {
return nil, err
}
if head.GetMessage() == nil {
return nil, errors.New("no raft message in the first chunk")
}
message := head.GetMessage().GetMessage()
snapKey, err := snap.SnapKeyFromSnap(message.GetSnapshot())
if err != nil {
return nil, errors.Errorf("failed to create snap key: %v", err)
}
data := message.GetSnapshot().GetData()
snapshot, err := r.snapManager.GetSnapshotForReceiving(snapKey, data)
if err != nil {
return nil, errors.Errorf("%v failed to create snapshot file: %v", snapKey, err)
}
if snapshot.Exists() {
log.Infof("snapshot file already exists, skip receiving. snapKey: %v, file: %v", snapKey, snapshot.Path())
stream.SendAndClose(&raft_serverpb.Done{})
return head.GetMessage(), nil
}
r.snapManager.Register(snapKey, snap.SnapEntryReceiving)
defer r.snapManager.Deregister(snapKey, snap.SnapEntryReceiving)
for {
chunk, err := stream.Recv()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
data := chunk.GetData()
if len(data) == 0 {
return nil, errors.Errorf("%v receive chunk with empty data", snapKey)
}
_, err = bytes.NewReader(data).WriteTo(snapshot)
if err != nil {
return nil, errors.Errorf("%v failed to write snapshot file %v: %v", snapKey, snapshot.Path(), err)
}
}
err = snapshot.Save()
if err != nil {
return nil, err
}
stream.SendAndClose(&raft_serverpb.Done{})
return head.GetMessage(), nil
}

View File

@ -0,0 +1,95 @@
package raft_storage
import (
"sync"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/util/worker"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
)
type ServerTransport struct {
raftClient *RaftClient
raftRouter message.RaftRouter
resolverScheduler chan<- worker.Task
snapScheduler chan<- worker.Task
resolving sync.Map
}
func NewServerTransport(raftClient *RaftClient, snapScheduler chan<- worker.Task, raftRouter message.RaftRouter, resolverScheduler chan<- worker.Task) *ServerTransport {
return &ServerTransport{
raftClient: raftClient,
raftRouter: raftRouter,
resolverScheduler: resolverScheduler,
snapScheduler: snapScheduler,
}
}
func (t *ServerTransport) Send(msg *raft_serverpb.RaftMessage) error {
storeID := msg.GetToPeer().GetStoreId()
t.SendStore(storeID, msg)
return nil
}
func (t *ServerTransport) SendStore(storeID uint64, msg *raft_serverpb.RaftMessage) {
addr := t.raftClient.GetAddr(storeID)
if addr != "" {
t.WriteData(storeID, addr, msg)
return
}
if _, ok := t.resolving.Load(storeID); ok {
log.Debugf("store address is being resolved, msg dropped. storeID: %v, msg: %s", storeID, msg)
return
}
log.Debug("begin to resolve store address. storeID: %v", storeID)
t.resolving.Store(storeID, struct{}{})
t.Resolve(storeID, msg)
}
func (t *ServerTransport) Resolve(storeID uint64, msg *raft_serverpb.RaftMessage) {
callback := func(addr string, err error) {
// clear resolving
t.resolving.Delete(storeID)
if err != nil {
log.Errorf("resolve store address failed. storeID: %v, err: %v", storeID, err)
return
}
t.raftClient.InsertAddr(storeID, addr)
t.WriteData(storeID, addr, msg)
t.raftClient.Flush()
}
t.resolverScheduler <- &resolveAddrTask{
storeID: storeID,
callback: callback,
}
}
func (t *ServerTransport) WriteData(storeID uint64, addr string, msg *raft_serverpb.RaftMessage) {
if msg.GetMessage().GetSnapshot() != nil {
t.SendSnapshotSock(addr, msg)
return
}
if err := t.raftClient.Send(storeID, addr, msg); err != nil {
log.Errorf("send raft msg err. err: %v", err)
}
}
func (t *ServerTransport) SendSnapshotSock(addr string, msg *raft_serverpb.RaftMessage) {
callback := func(err error) {
regionID := msg.GetRegionId()
toPeerID := msg.GetToPeer().GetId()
toStoreID := msg.GetToPeer().GetStoreId()
log.Debugf("send snapshot. toPeerID: %v, toStoreID: %v, regionID: %v, status: %v", toPeerID, toStoreID, regionID, err)
}
t.snapScheduler <- &sendSnapTask{
addr: addr,
msg: msg,
callback: callback,
}
}
func (t *ServerTransport) Flush() {
t.raftClient.Flush()
}

View File

@ -0,0 +1,38 @@
package standalone_storage
import (
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
)
// StandAloneStorage is an implementation of `Storage` for a single-node TinyKV instance. It does not
// communicate with other nodes and all data is stored locally.
type StandAloneStorage struct {
// Your Data Here (1).
}
func NewStandAloneStorage(conf *config.Config) *StandAloneStorage {
// Your Code Here (1).
return nil
}
func (s *StandAloneStorage) Start() error {
// Your Code Here (1).
return nil
}
func (s *StandAloneStorage) Stop() error {
// Your Code Here (1).
return nil
}
func (s *StandAloneStorage) Reader(ctx *kvrpcpb.Context) (storage.StorageReader, error) {
// Your Code Here (1).
return nil, nil
}
func (s *StandAloneStorage) Write(ctx *kvrpcpb.Context, batch []storage.Modify) error {
// Your Code Here (1).
return nil
}

21
kv/storage/storage.go Normal file
View File

@ -0,0 +1,21 @@
package storage
import (
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
)
// Storage represents the internal-facing server part of TinyKV, it handles sending and receiving from other
// TinyKV nodes. As part of that responsibility, it also reads and writes data to disk (or semi-permanent memory).
type Storage interface {
Start() error
Stop() error
Write(ctx *kvrpcpb.Context, batch []Modify) error
Reader(ctx *kvrpcpb.Context) (StorageReader, error)
}
type StorageReader interface {
GetCF(cf string, key []byte) ([]byte, error)
IterCF(cf string) engine_util.DBIterator
Close()
}

View File

@ -0,0 +1,466 @@
package test_raftstore
import (
"bytes"
"context"
"encoding/hex"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"time"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore"
"github.com/pingcap-incubator/tinykv/kv/storage/raft_storage"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
)
type Simulator interface {
RunStore(raftConf *config.Config, engine *engine_util.Engines, ctx context.Context) error
StopStore(storeID uint64)
AddFilter(filter Filter)
ClearFilters()
GetStoreIds() []uint64
CallCommandOnStore(storeID uint64, request *raft_cmdpb.RaftCmdRequest, timeout time.Duration) (*raft_cmdpb.RaftCmdResponse, *badger.Txn)
}
type Cluster struct {
schedulerClient *MockSchedulerClient
count int
engines map[uint64]*engine_util.Engines
snapPaths map[uint64]string
dirs []string
simulator Simulator
cfg *config.Config
}
func NewCluster(count int, schedulerClient *MockSchedulerClient, simulator Simulator, cfg *config.Config) *Cluster {
return &Cluster{
count: count,
schedulerClient: schedulerClient,
engines: make(map[uint64]*engine_util.Engines),
snapPaths: make(map[uint64]string),
simulator: simulator,
cfg: cfg,
}
}
func (c *Cluster) Start() {
ctx := context.TODO()
clusterID := c.schedulerClient.GetClusterID(ctx)
for storeID := uint64(1); storeID <= uint64(c.count); storeID++ {
dbPath, err := ioutil.TempDir("", "test-raftstore")
if err != nil {
panic(err)
}
c.cfg.DBPath = dbPath
kvPath := filepath.Join(dbPath, "kv")
raftPath := filepath.Join(dbPath, "raft")
snapPath := filepath.Join(dbPath, "snap")
c.snapPaths[storeID] = snapPath
c.dirs = append(c.dirs, []string{kvPath, raftPath, snapPath}...)
err = os.MkdirAll(kvPath, os.ModePerm)
if err != nil {
panic(err)
}
err = os.MkdirAll(raftPath, os.ModePerm)
if err != nil {
panic(err)
}
err = os.MkdirAll(snapPath, os.ModePerm)
if err != nil {
panic(err)
}
raftDB := engine_util.CreateDB("raft", c.cfg)
kvDB := engine_util.CreateDB("kv", c.cfg)
engine := engine_util.NewEngines(kvDB, raftDB, kvPath, raftPath)
c.engines[storeID] = engine
}
regionEpoch := &metapb.RegionEpoch{
Version: raftstore.InitEpochVer,
ConfVer: raftstore.InitEpochConfVer,
}
firstRegion := &metapb.Region{
Id: 1,
StartKey: []byte{},
EndKey: []byte{},
RegionEpoch: regionEpoch,
}
for storeID, engine := range c.engines {
peer := NewPeer(storeID, storeID)
firstRegion.Peers = append(firstRegion.Peers, peer)
err := raftstore.BootstrapStore(engine, clusterID, storeID)
if err != nil {
panic(err)
}
}
for _, engine := range c.engines {
raftstore.PrepareBootstrapCluster(engine, firstRegion)
}
store := &metapb.Store{
Id: 1,
Address: "",
}
resp, err := c.schedulerClient.Bootstrap(context.TODO(), store)
if err != nil {
panic(err)
}
if resp.Header != nil && resp.Header.Error != nil {
panic(resp.Header.Error)
}
for storeID, engine := range c.engines {
store := &metapb.Store{
Id: storeID,
Address: "",
}
err := c.schedulerClient.PutStore(context.TODO(), store)
if err != nil {
panic(err)
}
raftstore.ClearPrepareBootstrapState(engine)
}
for storeID := range c.engines {
c.StartServer(storeID)
}
}
func (c *Cluster) Shutdown() {
for _, storeID := range c.simulator.GetStoreIds() {
c.simulator.StopStore(storeID)
}
for _, engine := range c.engines {
engine.Close()
}
for _, dir := range c.dirs {
os.RemoveAll(dir)
}
}
func (c *Cluster) AddFilter(filter Filter) {
c.simulator.AddFilter(filter)
}
func (c *Cluster) ClearFilters() {
c.simulator.ClearFilters()
}
func (c *Cluster) StopServer(storeID uint64) {
c.simulator.StopStore(storeID)
}
func (c *Cluster) StartServer(storeID uint64) {
engine := c.engines[storeID]
err := c.simulator.RunStore(c.cfg, engine, context.TODO())
if err != nil {
panic(err)
}
}
func (c *Cluster) AllocPeer(storeID uint64) *metapb.Peer {
id, err := c.schedulerClient.AllocID(context.TODO())
if err != nil {
panic(err)
}
return NewPeer(storeID, id)
}
func (c *Cluster) Request(key []byte, reqs []*raft_cmdpb.Request, timeout time.Duration) (*raft_cmdpb.RaftCmdResponse, *badger.Txn) {
startTime := time.Now()
for i := 0; i < 10 || time.Now().Sub(startTime) < timeout; i++ {
region := c.GetRegion(key)
regionID := region.GetId()
req := NewRequest(regionID, region.RegionEpoch, reqs)
resp, txn := c.CallCommandOnLeader(&req, timeout)
if resp == nil {
// it should be timeouted innerly
SleepMS(100)
continue
}
if resp.Header.Error != nil {
SleepMS(100)
continue
}
return resp, txn
}
panic("request timeout")
}
func (c *Cluster) CallCommand(request *raft_cmdpb.RaftCmdRequest, timeout time.Duration) (*raft_cmdpb.RaftCmdResponse, *badger.Txn) {
storeID := request.Header.Peer.StoreId
return c.simulator.CallCommandOnStore(storeID, request, timeout)
}
func (c *Cluster) CallCommandOnLeader(request *raft_cmdpb.RaftCmdRequest, timeout time.Duration) (*raft_cmdpb.RaftCmdResponse, *badger.Txn) {
startTime := time.Now()
regionID := request.Header.RegionId
leader := c.LeaderOfRegion(regionID)
for {
if time.Now().Sub(startTime) > timeout {
return nil, nil
}
if leader == nil {
panic(fmt.Sprintf("can't get leader of region %d", regionID))
}
request.Header.Peer = leader
resp, txn := c.CallCommand(request, 1*time.Second)
if resp == nil {
log.Warnf("can't call command %s on leader %d of region %d", request.String(), leader.GetId(), regionID)
newLeader := c.LeaderOfRegion(regionID)
if leader == newLeader {
region, _, err := c.schedulerClient.GetRegionByID(context.TODO(), regionID)
if err != nil {
return nil, nil
}
peers := region.GetPeers()
leader = peers[rand.Int()%len(peers)]
log.Debugf("leader info maybe wrong, use random leader %d of region %d", leader.GetId(), regionID)
} else {
leader = newLeader
log.Debugf("use new leader %d of region %d", leader.GetId(), regionID)
}
continue
}
if resp.Header.Error != nil {
err := resp.Header.Error
if err.GetStaleCommand() != nil || err.GetEpochNotMatch() != nil || err.GetNotLeader() != nil {
log.Debugf("encouter retryable err %+v", resp)
if err.GetNotLeader() != nil && err.GetNotLeader().Leader != nil {
leader = err.GetNotLeader().Leader
} else {
leader = c.LeaderOfRegion(regionID)
}
continue
}
}
return resp, txn
}
}
func (c *Cluster) LeaderOfRegion(regionID uint64) *metapb.Peer {
for i := 0; i < 500; i++ {
_, leader, err := c.schedulerClient.GetRegionByID(context.TODO(), regionID)
if err == nil && leader != nil {
return leader
}
SleepMS(10)
}
return nil
}
func (c *Cluster) GetRegion(key []byte) *metapb.Region {
for i := 0; i < 100; i++ {
region, _, _ := c.schedulerClient.GetRegion(context.TODO(), key)
if region != nil {
return region
}
// We may meet range gap after split, so here we will
// retry to get the region again.
SleepMS(20)
}
panic(fmt.Sprintf("find no region for %s", hex.EncodeToString(key)))
}
func (c *Cluster) GetRandomRegion() *metapb.Region {
return c.schedulerClient.getRandomRegion()
}
func (c *Cluster) GetStoreIdsOfRegion(regionID uint64) []uint64 {
region, _, err := c.schedulerClient.GetRegionByID(context.TODO(), regionID)
if err != nil {
panic(err)
}
peers := region.GetPeers()
storeIds := make([]uint64, len(peers))
for i, peer := range peers {
storeIds[i] = peer.GetStoreId()
}
return storeIds
}
func (c *Cluster) MustPut(key, value []byte) {
c.MustPutCF(engine_util.CfDefault, key, value)
}
func (c *Cluster) MustPutCF(cf string, key, value []byte) {
req := NewPutCfCmd(cf, key, value)
resp, _ := c.Request(key, []*raft_cmdpb.Request{req}, 5*time.Second)
if resp.Header.Error != nil {
panic(resp.Header.Error)
}
if len(resp.Responses) != 1 {
panic("len(resp.Responses) != 1")
}
if resp.Responses[0].CmdType != raft_cmdpb.CmdType_Put {
panic("resp.Responses[0].CmdType != raft_cmdpb.CmdType_Put")
}
}
func (c *Cluster) MustGet(key []byte, value []byte) {
v := c.Get(key)
if !bytes.Equal(v, value) {
panic(fmt.Sprintf("expected value %s, but got %s", value, v))
}
}
func (c *Cluster) Get(key []byte) []byte {
return c.GetCF(engine_util.CfDefault, key)
}
func (c *Cluster) GetCF(cf string, key []byte) []byte {
req := NewGetCfCmd(cf, key)
resp, _ := c.Request(key, []*raft_cmdpb.Request{req}, 5*time.Second)
if resp.Header.Error != nil {
panic(resp.Header.Error)
}
if len(resp.Responses) != 1 {
panic("len(resp.Responses) != 1")
}
if resp.Responses[0].CmdType != raft_cmdpb.CmdType_Get {
panic("resp.Responses[0].CmdType != raft_cmdpb.CmdType_Get")
}
return resp.Responses[0].Get.Value
}
func (c *Cluster) MustDelete(key []byte) {
c.MustDeleteCF(engine_util.CfDefault, key)
}
func (c *Cluster) MustDeleteCF(cf string, key []byte) {
req := NewDeleteCfCmd(cf, key)
resp, _ := c.Request(key, []*raft_cmdpb.Request{req}, 5*time.Second)
if resp.Header.Error != nil {
panic(resp.Header.Error)
}
if len(resp.Responses) != 1 {
panic("len(resp.Responses) != 1")
}
if resp.Responses[0].CmdType != raft_cmdpb.CmdType_Delete {
panic("resp.Responses[0].CmdType != raft_cmdpb.CmdType_Delete")
}
}
func (c *Cluster) Scan(start, end []byte) [][]byte {
req := NewSnapCmd()
values := make([][]byte, 0)
key := start
for (len(end) != 0 && bytes.Compare(key, end) < 0) || (len(key) == 0 && len(end) == 0) {
resp, txn := c.Request(key, []*raft_cmdpb.Request{req}, 5*time.Second)
if resp.Header.Error != nil {
panic(resp.Header.Error)
}
if len(resp.Responses) != 1 {
panic("len(resp.Responses) != 1")
}
if resp.Responses[0].CmdType != raft_cmdpb.CmdType_Snap {
panic("resp.Responses[0].CmdType != raft_cmdpb.CmdType_Snap")
}
region := resp.Responses[0].GetSnap().Region
iter := raft_storage.NewRegionReader(txn, *region).IterCF(engine_util.CfDefault)
for iter.Seek(key); iter.Valid(); iter.Next() {
if engine_util.ExceedEndKey(iter.Item().Key(), end) {
break
}
value, err := iter.Item().ValueCopy(nil)
if err != nil {
panic(err)
}
values = append(values, value)
}
key = region.EndKey
if len(key) == 0 {
break
}
}
return values
}
func (c *Cluster) TransferLeader(regionID uint64, leader *metapb.Peer) {
region, _, err := c.schedulerClient.GetRegionByID(context.TODO(), regionID)
if err != nil {
panic(err)
}
epoch := region.RegionEpoch
transferLeader := NewAdminRequest(regionID, epoch, NewTransferLeaderCmd(leader))
resp, _ := c.CallCommandOnLeader(transferLeader, 5*time.Second)
if resp.AdminResponse.CmdType != raft_cmdpb.AdminCmdType_TransferLeader {
panic("resp.AdminResponse.CmdType != raft_cmdpb.AdminCmdType_TransferLeader")
}
}
func (c *Cluster) MustTransferLeader(regionID uint64, leader *metapb.Peer) {
timer := time.Now()
for {
currentLeader := c.LeaderOfRegion(regionID)
if currentLeader.Id == leader.Id &&
currentLeader.StoreId == leader.StoreId {
return
}
if time.Now().Sub(timer) > 5*time.Second {
panic(fmt.Sprintf("failed to transfer leader to [%d] %s", regionID, leader.String()))
}
c.TransferLeader(regionID, leader)
}
}
func (c *Cluster) MustAddPeer(regionID uint64, peer *metapb.Peer) {
c.schedulerClient.AddPeer(regionID, peer)
c.MustHavePeer(regionID, peer)
}
func (c *Cluster) MustRemovePeer(regionID uint64, peer *metapb.Peer) {
c.schedulerClient.RemovePeer(regionID, peer)
c.MustNonePeer(regionID, peer)
}
func (c *Cluster) MustHavePeer(regionID uint64, peer *metapb.Peer) {
for i := 0; i < 500; i++ {
region, _, err := c.schedulerClient.GetRegionByID(context.TODO(), regionID)
if err != nil {
panic(err)
}
if region != nil {
if p := FindPeer(region, peer.GetStoreId()); p != nil {
if p.GetId() == peer.GetId() {
return
}
}
}
SleepMS(10)
}
}
func (c *Cluster) MustNonePeer(regionID uint64, peer *metapb.Peer) {
for i := 0; i < 500; i++ {
region, _, err := c.schedulerClient.GetRegionByID(context.TODO(), regionID)
if err != nil {
panic(err)
}
if region != nil {
if p := FindPeer(region, peer.GetStoreId()); p != nil {
if p.GetId() != peer.GetId() {
return
}
} else {
return
}
}
SleepMS(10)
}
}

View File

@ -0,0 +1,45 @@
package test_raftstore
import (
"math/rand"
rspb "github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
)
type Filter interface {
Before(msgs *rspb.RaftMessage) bool
After()
}
type PartitionFilter struct {
s1 []uint64
s2 []uint64
}
func (f *PartitionFilter) Before(msg *rspb.RaftMessage) bool {
inS1 := false
inS2 := false
for _, storeID := range f.s1 {
if msg.FromPeer.StoreId == storeID || msg.ToPeer.StoreId == storeID {
inS1 = true
break
}
}
for _, storeID := range f.s2 {
if msg.FromPeer.StoreId == storeID || msg.ToPeer.StoreId == storeID {
inS2 = true
break
}
}
return !(inS1 && inS2)
}
func (f *PartitionFilter) After() {}
type DropFilter struct{}
func (f *DropFilter) Before(msg *rspb.RaftMessage) bool {
return (rand.Int() % 1000) > 100
}
func (f *DropFilter) After() {}

218
kv/test_raftstore/node.go Normal file
View File

@ -0,0 +1,218 @@
package test_raftstore
import (
"context"
"errors"
"fmt"
"io"
"sync"
"time"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore"
"github.com/pingcap-incubator/tinykv/kv/raftstore/message"
"github.com/pingcap-incubator/tinykv/kv/raftstore/scheduler_client"
"github.com/pingcap-incubator/tinykv/kv/raftstore/snap"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_serverpb"
)
type MockTransport struct {
sync.RWMutex
filters []Filter
routers map[uint64]message.RaftRouter
snapMgrs map[uint64]*snap.SnapManager
}
func NewMockTransport() *MockTransport {
return &MockTransport{
routers: make(map[uint64]message.RaftRouter),
snapMgrs: make(map[uint64]*snap.SnapManager),
}
}
func (t *MockTransport) AddStore(storeID uint64, raftRouter message.RaftRouter, snapMgr *snap.SnapManager) {
t.Lock()
defer t.Unlock()
t.routers[storeID] = raftRouter
t.snapMgrs[storeID] = snapMgr
}
func (t *MockTransport) RemoveStore(storeID uint64) {
t.Lock()
defer t.Unlock()
delete(t.routers, storeID)
delete(t.snapMgrs, storeID)
}
func (t *MockTransport) AddFilter(filter Filter) {
t.Lock()
defer t.Unlock()
t.filters = append(t.filters, filter)
}
func (t *MockTransport) ClearFilters() {
t.Lock()
defer t.Unlock()
t.filters = nil
}
func (t *MockTransport) Send(msg *raft_serverpb.RaftMessage) error {
t.RLock()
defer t.RUnlock()
for _, filter := range t.filters {
if !filter.Before(msg) {
return errors.New(fmt.Sprintf("message %+v is dropped", msg))
}
}
fromStore := msg.GetFromPeer().GetStoreId()
toStore := msg.GetToPeer().GetStoreId()
isSnapshot := msg.GetMessage().GetMsgType() == eraftpb.MessageType_MsgSnapshot
if isSnapshot {
snapshot := msg.Message.Snapshot
key, err := snap.SnapKeyFromSnap(snapshot)
if err != nil {
return err
}
fromSnapMgr, found := t.snapMgrs[fromStore]
if !found {
return errors.New(fmt.Sprintf("store %d is closed", fromStore))
}
fromSnapMgr.Register(key, snap.SnapEntrySending)
fromSnap, err := fromSnapMgr.GetSnapshotForSending(key)
if err != nil {
return err
}
toSnapMgr, found := t.snapMgrs[toStore]
if !found {
return errors.New(fmt.Sprintf("store %d is closed", toStore))
}
toSnapMgr.Register(key, snap.SnapEntryReceiving)
toSnap, err := toSnapMgr.GetSnapshotForReceiving(key, snapshot.GetData())
if err != nil {
return err
}
io.Copy(toSnap, fromSnap)
toSnap.Save()
toSnapMgr.Deregister(key, snap.SnapEntryReceiving)
fromSnapMgr.Deregister(key, snap.SnapEntrySending)
}
router, found := t.routers[toStore]
if !found {
return errors.New(fmt.Sprintf("store %d is closed", toStore))
}
router.SendRaftMessage(msg)
for _, filter := range t.filters {
filter.After()
}
return nil
}
type NodeSimulator struct {
sync.RWMutex
trans *MockTransport
schedulerClient scheduler_client.Client
nodes map[uint64]*raftstore.Node
}
func NewNodeSimulator(schedulerClient scheduler_client.Client) *NodeSimulator {
trans := NewMockTransport()
return &NodeSimulator{
trans: trans,
schedulerClient: schedulerClient,
nodes: make(map[uint64]*raftstore.Node),
}
}
func (c *NodeSimulator) RunStore(cfg *config.Config, engine *engine_util.Engines, ctx context.Context) error {
c.Lock()
defer c.Unlock()
raftRouter, batchSystem := raftstore.CreateRaftBatchSystem(cfg)
snapManager := snap.NewSnapManager(cfg.DBPath + "/snap")
node := raftstore.NewNode(batchSystem, cfg, c.schedulerClient)
err := node.Start(ctx, engine, c.trans, snapManager)
if err != nil {
return err
}
storeID := node.GetStoreID()
c.nodes[storeID] = node
c.trans.AddStore(storeID, raftRouter, snapManager)
return nil
}
func (c *NodeSimulator) StopStore(storeID uint64) {
c.Lock()
defer c.Unlock()
node := c.nodes[storeID]
if node == nil {
panic(fmt.Sprintf("Can not find store %d", storeID))
}
node.Stop()
delete(c.nodes, storeID)
c.trans.RemoveStore(storeID)
}
func (c *NodeSimulator) AddFilter(filter Filter) {
c.Lock()
defer c.Unlock()
c.trans.AddFilter(filter)
}
func (c *NodeSimulator) ClearFilters() {
c.Lock()
defer c.Unlock()
c.trans.ClearFilters()
}
func (c *NodeSimulator) GetStoreIds() []uint64 {
c.RLock()
defer c.RUnlock()
storeIDs := make([]uint64, 0, len(c.nodes))
for storeID := range c.nodes {
storeIDs = append(storeIDs, storeID)
}
return storeIDs
}
func (c *NodeSimulator) CallCommandOnStore(storeID uint64, request *raft_cmdpb.RaftCmdRequest, timeout time.Duration) (*raft_cmdpb.RaftCmdResponse, *badger.Txn) {
c.RLock()
router := c.trans.routers[storeID]
if router == nil {
log.Fatalf("Can not find node %d", storeID)
}
c.RUnlock()
cb := message.NewCallback()
err := router.SendRaftCommand(request, cb)
if err != nil {
return nil, nil
}
resp := cb.WaitRespWithTimeout(timeout)
return resp, cb.Txn
}

View File

@ -0,0 +1,565 @@
package test_raftstore
import (
"bytes"
"context"
"fmt"
"sync"
"github.com/google/btree"
"github.com/pingcap-incubator/tinykv/kv/raftstore/util"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/eraftpb"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/schedulerpb"
"github.com/pingcap/errors"
)
var _ btree.Item = &regionItem{}
type regionItem struct {
region metapb.Region
}
// Less returns true if the region start key is less than the other.
func (r *regionItem) Less(other btree.Item) bool {
left := r.region.GetStartKey()
right := other.(*regionItem).region.GetStartKey()
return bytes.Compare(left, right) < 0
}
func (r *regionItem) Contains(key []byte) bool {
start, end := r.region.GetStartKey(), r.region.GetEndKey()
return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0)
}
type OperatorType int64
const (
OperatorTypeAddPeer = 1
OperatorTypeRemovePeer = 2
OperatorTypeTransferLeader = 3
)
type Operator struct {
Type OperatorType
Data interface{}
}
type OpAddPeer struct {
peer *metapb.Peer
pending bool
}
type OpRemovePeer struct {
peer *metapb.Peer
}
type OpTransferLeader struct {
peer *metapb.Peer
}
type Store struct {
store metapb.Store
heartbeatResponseHandler func(*schedulerpb.RegionHeartbeatResponse)
}
func NewStore(store *metapb.Store) *Store {
return &Store{
store: *store,
heartbeatResponseHandler: nil,
}
}
type MockSchedulerClient struct {
sync.RWMutex
clusterID uint64
meta metapb.Cluster
stores map[uint64]*Store
regionsRange *btree.BTree // key -> region
regionsKey map[uint64][]byte // regionID -> startKey
baseID uint64
operators map[uint64]*Operator
leaders map[uint64]*metapb.Peer // regionID -> peer
pendingPeers map[uint64]*metapb.Peer // peerID -> peer
bootstrapped bool
}
func NewMockSchedulerClient(clusterID uint64, baseID uint64) *MockSchedulerClient {
return &MockSchedulerClient{
clusterID: clusterID,
meta: metapb.Cluster{
Id: clusterID,
},
stores: make(map[uint64]*Store),
regionsRange: btree.New(2),
regionsKey: make(map[uint64][]byte),
baseID: baseID,
operators: make(map[uint64]*Operator),
leaders: make(map[uint64]*metapb.Peer),
pendingPeers: make(map[uint64]*metapb.Peer),
}
}
// Implement SchedulerClient interface
func (m *MockSchedulerClient) GetClusterID(ctx context.Context) uint64 {
m.RLock()
defer m.RUnlock()
return m.clusterID
}
func (m *MockSchedulerClient) AllocID(ctx context.Context) (uint64, error) {
m.Lock()
defer m.Unlock()
ret := m.baseID
m.baseID++
return ret, nil
}
func (m *MockSchedulerClient) Bootstrap(ctx context.Context, store *metapb.Store) (*schedulerpb.BootstrapResponse, error) {
m.Lock()
defer m.Unlock()
resp := &schedulerpb.BootstrapResponse{
Header: &schedulerpb.ResponseHeader{ClusterId: m.clusterID},
}
if m.bootstrapped == true || len(m.regionsKey) != 0 {
m.bootstrapped = true
resp.Header.Error = &schedulerpb.Error{
Type: schedulerpb.ErrorType_ALREADY_BOOTSTRAPPED,
Message: "cluster is already bootstrapped",
}
return resp, nil
}
m.stores[store.GetId()] = NewStore(store)
m.bootstrapped = true
return resp, nil
}
func (m *MockSchedulerClient) IsBootstrapped(ctx context.Context) (bool, error) {
m.RLock()
defer m.RUnlock()
return m.bootstrapped, nil
}
func (m *MockSchedulerClient) checkBootstrap() error {
if bootstrapped, _ := m.IsBootstrapped(context.TODO()); !bootstrapped {
return errors.New("not bootstrapped")
}
return nil
}
func (m *MockSchedulerClient) PutStore(ctx context.Context, store *metapb.Store) error {
if err := m.checkBootstrap(); err != nil {
return err
}
m.Lock()
defer m.Unlock()
s := NewStore(store)
m.stores[store.GetId()] = s
return nil
}
func (m *MockSchedulerClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) {
if err := m.checkBootstrap(); err != nil {
return nil, err
}
m.RLock()
defer m.RUnlock()
s, ok := m.stores[storeID]
if !ok {
return nil, errors.Errorf("store %d not found", storeID)
}
return &s.store, nil
}
func (m *MockSchedulerClient) GetRegion(ctx context.Context, key []byte) (*metapb.Region, *metapb.Peer, error) {
if err := m.checkBootstrap(); err != nil {
return nil, nil, err
}
m.RLock()
defer m.RUnlock()
region, leader := m.getRegionLocked(key)
return region, leader, nil
}
func (m *MockSchedulerClient) getRegionLocked(key []byte) (*metapb.Region, *metapb.Peer) {
result := m.findRegion(key)
if result == nil {
return nil, nil
}
leader := m.leaders[result.region.GetId()]
return &result.region, leader
}
func (m *MockSchedulerClient) GetRegionByID(ctx context.Context, regionID uint64) (*metapb.Region, *metapb.Peer, error) {
if err := m.checkBootstrap(); err != nil {
return nil, nil, err
}
m.RLock()
defer m.RUnlock()
return m.getRegionByIDLocked(regionID)
}
func (m *MockSchedulerClient) getRegionByIDLocked(regionID uint64) (*metapb.Region, *metapb.Peer, error) {
startKey := m.regionsKey[regionID]
region, leader := m.getRegionLocked(startKey)
return region, leader, nil
}
func (m *MockSchedulerClient) AskSplit(ctx context.Context, region *metapb.Region) (*schedulerpb.AskSplitResponse, error) {
resp := new(schedulerpb.AskSplitResponse)
resp.Header = &schedulerpb.ResponseHeader{ClusterId: m.clusterID}
curRegion, _, err := m.GetRegionByID(ctx, region.GetId())
if err != nil {
return resp, err
}
if util.IsEpochStale(region.RegionEpoch, curRegion.RegionEpoch) {
return resp, errors.New("epoch is stale")
}
id, _ := m.AllocID(ctx)
resp.NewRegionId = id
for range region.GetPeers() {
id, _ := m.AllocID(ctx)
resp.NewPeerIds = append(resp.NewPeerIds, id)
}
return resp, nil
}
func (m *MockSchedulerClient) StoreHeartbeat(ctx context.Context, stats *schedulerpb.StoreStats) error {
if err := m.checkBootstrap(); err != nil {
return err
}
// nothing need to do
return nil
}
func (m *MockSchedulerClient) RegionHeartbeat(req *schedulerpb.RegionHeartbeatRequest) error {
if err := m.checkBootstrap(); err != nil {
return err
}
m.Lock()
defer m.Unlock()
regionID := req.Region.GetId()
for _, p := range req.Region.GetPeers() {
delete(m.pendingPeers, p.GetId())
}
for _, p := range req.GetPendingPeers() {
m.pendingPeers[p.GetId()] = p
}
m.leaders[regionID] = req.Leader
if err := m.handleHeartbeatVersion(req.Region); err != nil {
return err
}
if err := m.handleHeartbeatConfVersion(req.Region); err != nil {
return err
}
resp := &schedulerpb.RegionHeartbeatResponse{
Header: &schedulerpb.ResponseHeader{ClusterId: m.clusterID},
RegionId: regionID,
RegionEpoch: req.Region.GetRegionEpoch(),
TargetPeer: req.Leader,
}
if op := m.operators[regionID]; op != nil {
if m.tryFinished(op, req.Region, req.Leader) {
delete(m.operators, regionID)
} else {
m.makeRegionHeartbeatResponse(op, resp)
}
log.Debugf("[region %d] schedule %v", regionID, op)
}
store := m.stores[req.Leader.GetStoreId()]
store.heartbeatResponseHandler(resp)
return nil
}
func (m *MockSchedulerClient) handleHeartbeatVersion(region *metapb.Region) error {
if engine_util.ExceedEndKey(region.GetStartKey(), region.GetEndKey()) {
panic("start key > end key")
}
for {
searchRegion, _ := m.getRegionLocked(region.GetStartKey())
if searchRegion == nil {
m.addRegionLocked(region)
return nil
} else {
if bytes.Equal(searchRegion.GetStartKey(), region.GetStartKey()) &&
bytes.Equal(searchRegion.GetEndKey(), region.GetEndKey()) {
// the two regions' range are same, must check epoch
if util.IsEpochStale(region.RegionEpoch, searchRegion.RegionEpoch) {
return errors.New("epoch is stale")
}
if searchRegion.RegionEpoch.Version < region.RegionEpoch.Version {
m.removeRegionLocked(searchRegion)
m.addRegionLocked(region)
}
return nil
}
if engine_util.ExceedEndKey(searchRegion.GetStartKey(), region.GetEndKey()) {
// No range covers [start, end) now, insert directly.
m.addRegionLocked(region)
return nil
} else {
// overlap, remove old, insert new.
// E.g, 1 [a, c) -> 1 [a, b) + 2 [b, c), either new 1 or 2 reports, the region
// is overlapped with origin [a, c).
if region.GetRegionEpoch().GetVersion() <= searchRegion.GetRegionEpoch().GetVersion() {
return errors.New("epoch is stale")
}
m.removeRegionLocked(searchRegion)
}
}
}
}
func (m *MockSchedulerClient) handleHeartbeatConfVersion(region *metapb.Region) error {
searchRegion, _ := m.getRegionLocked(region.GetStartKey())
if util.IsEpochStale(region.RegionEpoch, searchRegion.RegionEpoch) {
return errors.New("epoch is stale")
}
regionPeerLen := len(region.GetPeers())
searchRegionPeerLen := len(searchRegion.GetPeers())
if region.RegionEpoch.ConfVer > searchRegion.RegionEpoch.ConfVer {
// If ConfVer changed, TinyKV has added/removed one peer already.
// So scheduler and TinyKV can't have same peer count and can only have
// only one different peer.
if searchRegionPeerLen > regionPeerLen {
if searchRegionPeerLen-regionPeerLen != 1 {
panic("should only one conf change")
}
fmt.Println(searchRegion, region)
if len(GetDiffPeers(searchRegion, region)) != 1 {
panic("should only one different peer")
}
if len(GetDiffPeers(region, searchRegion)) != 0 {
panic("should include all peers")
}
} else if searchRegionPeerLen < regionPeerLen {
if regionPeerLen-searchRegionPeerLen != 1 {
panic("should only one conf change")
}
if len(GetDiffPeers(region, searchRegion)) != 1 {
panic("should only one different peer")
}
if len(GetDiffPeers(searchRegion, region)) != 0 {
panic("should include all peers")
}
} else {
MustSamePeers(searchRegion, region)
if searchRegion.RegionEpoch.ConfVer+1 != region.RegionEpoch.ConfVer {
panic("unmatched conf version")
}
if searchRegion.RegionEpoch.Version+1 != region.RegionEpoch.Version {
panic("unmatched version")
}
}
// update the region.
if m.regionsRange.ReplaceOrInsert(&regionItem{region: *region}) == nil {
panic("update inexistent region ")
}
} else {
MustSamePeers(searchRegion, region)
}
return nil
}
func (m *MockSchedulerClient) tryFinished(op *Operator, region *metapb.Region, leader *metapb.Peer) bool {
switch op.Type {
case OperatorTypeAddPeer:
add := op.Data.(OpAddPeer)
if !add.pending {
for _, p := range region.GetPeers() {
if add.peer.GetId() == p.GetId() {
add.pending = true
} else {
// TinyKV rejects AddNode.
return false
}
}
} else {
_, found := m.pendingPeers[add.peer.GetId()]
return !found
}
case OperatorTypeRemovePeer:
remove := op.Data.(OpRemovePeer)
for _, p := range region.GetPeers() {
if remove.peer.GetId() == p.GetId() {
return false
}
}
return true
case OperatorTypeTransferLeader:
transfer := op.Data.(OpTransferLeader)
return leader.GetId() == transfer.peer.GetId()
}
panic("unreachable")
}
func (m *MockSchedulerClient) makeRegionHeartbeatResponse(op *Operator, resp *schedulerpb.RegionHeartbeatResponse) {
switch op.Type {
case OperatorTypeAddPeer:
add := op.Data.(OpAddPeer)
if !add.pending {
resp.ChangePeer = &schedulerpb.ChangePeer{
ChangeType: eraftpb.ConfChangeType_AddNode,
Peer: add.peer,
}
}
case OperatorTypeRemovePeer:
remove := op.Data.(OpRemovePeer)
resp.ChangePeer = &schedulerpb.ChangePeer{
ChangeType: eraftpb.ConfChangeType_RemoveNode,
Peer: remove.peer,
}
case OperatorTypeTransferLeader:
transfer := op.Data.(OpTransferLeader)
resp.TransferLeader = &schedulerpb.TransferLeader{
Peer: transfer.peer,
}
}
}
func (m *MockSchedulerClient) SetRegionHeartbeatResponseHandler(storeID uint64, h func(*schedulerpb.RegionHeartbeatResponse)) {
if h == nil {
h = func(*schedulerpb.RegionHeartbeatResponse) {}
}
m.Lock()
defer m.Unlock()
store := m.stores[storeID]
store.heartbeatResponseHandler = h
}
func (m *MockSchedulerClient) Close() {
// do nothing
}
func (m *MockSchedulerClient) findRegion(key []byte) *regionItem {
item := &regionItem{region: metapb.Region{StartKey: key}}
var result *regionItem
m.regionsRange.DescendLessOrEqual(item, func(i btree.Item) bool {
result = i.(*regionItem)
return false
})
if result == nil || !result.Contains(key) {
return nil
}
return result
}
func (m *MockSchedulerClient) addRegionLocked(region *metapb.Region) {
m.regionsKey[region.GetId()] = region.GetStartKey()
m.regionsRange.ReplaceOrInsert(&regionItem{region: *region})
}
func (m *MockSchedulerClient) removeRegionLocked(region *metapb.Region) {
delete(m.regionsKey, region.GetId())
result := m.findRegion(region.GetStartKey())
if result == nil || result.region.GetId() != region.GetId() {
return
}
m.regionsRange.Delete(result)
}
// Extra API for tests
func (m *MockSchedulerClient) AddPeer(regionID uint64, peer *metapb.Peer) {
m.scheduleOperator(regionID, &Operator{
Type: OperatorTypeAddPeer,
Data: OpAddPeer{
peer: peer,
pending: false,
},
})
}
func (m *MockSchedulerClient) RemovePeer(regionID uint64, peer *metapb.Peer) {
m.scheduleOperator(regionID, &Operator{
Type: OperatorTypeRemovePeer,
Data: OpRemovePeer{
peer: peer,
},
})
}
func (m *MockSchedulerClient) TransferLeader(regionID uint64, peer *metapb.Peer) {
m.scheduleOperator(regionID, &Operator{
Type: OperatorTypeTransferLeader,
Data: OpTransferLeader{
peer: peer,
},
})
}
func (m *MockSchedulerClient) getRandomRegion() *metapb.Region {
m.RLock()
defer m.RUnlock()
for regionID := range m.leaders {
region, _, _ := m.getRegionByIDLocked(regionID)
return region
}
return nil
}
func (m *MockSchedulerClient) scheduleOperator(regionID uint64, op *Operator) {
m.Lock()
defer m.Unlock()
m.operators[regionID] = op
}
// Utilities
func MustSamePeers(left *metapb.Region, right *metapb.Region) {
if len(left.GetPeers()) != len(right.GetPeers()) {
panic("unmatched peers length")
}
for _, p := range left.GetPeers() {
if FindPeer(right, p.GetStoreId()) == nil {
panic("not found the peer")
}
}
}
func GetDiffPeers(left *metapb.Region, right *metapb.Region) []*metapb.Peer {
peers := make([]*metapb.Peer, 0, 1)
for _, p := range left.GetPeers() {
if FindPeer(right, p.GetStoreId()) == nil {
peers = append(peers, p)
}
}
return peers
}
func FindPeer(region *metapb.Region, storeID uint64) *metapb.Peer {
for _, p := range region.GetPeers() {
if p.GetStoreId() == storeID {
return p
}
}
return nil
}

View File

@ -0,0 +1,710 @@
package test_raftstore
import (
"bytes"
"fmt"
"math/rand"
_ "net/http/pprof"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/raftstore/meta"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
"github.com/stretchr/testify/assert"
)
// a client runs the function f and then signals it is done
func runClient(t *testing.T, me int, ca chan bool, fn func(me int, t *testing.T)) {
ok := false
defer func() { ca <- ok }()
fn(me, t)
ok = true
}
// spawn ncli clients and wait until they are all done
func SpawnClientsAndWait(t *testing.T, ch chan bool, ncli int, fn func(me int, t *testing.T)) {
defer func() { ch <- true }()
ca := make([]chan bool, ncli)
for cli := 0; cli < ncli; cli++ {
ca[cli] = make(chan bool)
go runClient(t, cli, ca[cli], fn)
}
// log.Printf("SpawnClientsAndWait: waiting for clients")
for cli := 0; cli < ncli; cli++ {
ok := <-ca[cli]
// log.Infof("SpawnClientsAndWait: client %d is done\n", cli)
if ok == false {
t.Fatalf("failure")
}
}
}
// predict effect of Append(k, val) if old value is prev.
func NextValue(prev string, val string) string {
return prev + val
}
// check that for a specific client all known appends are present in a value,
// and in order
func checkClntAppends(t *testing.T, clnt int, v string, count int) {
lastoff := -1
for j := 0; j < count; j++ {
wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y"
off := strings.Index(v, wanted)
if off < 0 {
t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v)
}
off1 := strings.LastIndex(v, wanted)
if off1 != off {
t.Fatalf("duplicate element %v in Append result", wanted)
}
if off <= lastoff {
t.Fatalf("wrong order for element %v in Append result", wanted)
}
lastoff = off
}
}
// check that all known appends are present in a value,
// and are in order for each concurrent client.
func checkConcurrentAppends(t *testing.T, v string, counts []int) {
nclients := len(counts)
for i := 0; i < nclients; i++ {
lastoff := -1
for j := 0; j < counts[i]; j++ {
wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
off := strings.Index(v, wanted)
if off < 0 {
t.Fatalf("%v missing element %v in Append result %v", i, wanted, v)
}
off1 := strings.LastIndex(v, wanted)
if off1 != off {
t.Fatalf("duplicate element %v in Append result", wanted)
}
if off <= lastoff {
t.Fatalf("wrong order for element %v in Append result", wanted)
}
lastoff = off
}
}
}
// repartition the servers periodically
func partitioner(t *testing.T, cluster *Cluster, ch chan bool, done *int32, unreliable bool, electionTimeout time.Duration) {
defer func() { ch <- true }()
for atomic.LoadInt32(done) == 0 {
a := make([]int, cluster.count)
for i := 0; i < cluster.count; i++ {
a[i] = (rand.Int() % 2)
}
pa := make([][]uint64, 2)
for i := 0; i < 2; i++ {
pa[i] = make([]uint64, 0)
for j := 1; j <= cluster.count; j++ {
if a[j-1] == i {
pa[i] = append(pa[i], uint64(j))
}
}
}
cluster.ClearFilters()
log.Infof("partition: %v, %v", pa[0], pa[1])
cluster.AddFilter(&PartitionFilter{
s1: pa[0],
s2: pa[1],
})
if unreliable {
cluster.AddFilter(&DropFilter{})
}
time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
}
}
func confchanger(t *testing.T, cluster *Cluster, ch chan bool, done *int32) {
defer func() { ch <- true }()
count := uint64(cluster.count)
for atomic.LoadInt32(done) == 0 {
region := cluster.GetRandomRegion()
store := rand.Uint64()%count + 1
if p := FindPeer(region, store); p != nil {
if len(region.GetPeers()) > 1 {
cluster.MustRemovePeer(region.GetId(), p)
}
} else {
cluster.MustAddPeer(region.GetId(), cluster.AllocPeer(store))
}
time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond)
}
}
// Basic test is as follows: one or more clients submitting Put/Scan
// operations to set of servers for some period of time. After the period is
// over, test checks that all sequential values are present and in order for a
// particular key and perform Delete to clean up.
// - If unreliable is set, RPCs may fail.
// - If crash is set, the servers restart after the period is over.
// - If partitions is set, the test repartitions the network concurrently between the servers.
// - If maxraftlog is a positive number, the count of the persistent log for Raft shouldn't exceed 2*maxraftlog.
// - If confchangee is set, the cluster will schedule random conf change concurrently.
// - If split is set, split region when size exceed 1024 bytes.
func GenericTest(t *testing.T, part string, nclients int, unreliable bool, crash bool, partitions bool, maxraftlog int, confchange bool, split bool) {
title := "Test: "
if unreliable {
// the network drops RPC requests and replies.
title = title + "unreliable net, "
}
if crash {
// peers re-start, and thus persistence must work.
title = title + "restarts, "
}
if partitions {
// the network may partition
title = title + "partitions, "
}
if maxraftlog != -1 {
title = title + "snapshots, "
}
if nclients > 1 {
title = title + "many clients"
} else {
title = title + "one client"
}
title = title + " (" + part + ")" // 3A or 3B
nservers := 5
cfg := config.NewTestConfig()
if maxraftlog != -1 {
cfg.RaftLogGcCountLimit = uint64(maxraftlog)
}
if split {
cfg.RegionMaxSize = 800
cfg.RegionSplitSize = 500
}
cluster := NewTestCluster(nservers, cfg)
cluster.Start()
defer cluster.Shutdown()
electionTimeout := cfg.RaftBaseTickInterval * time.Duration(cfg.RaftElectionTimeoutTicks)
done_partitioner := int32(0)
done_confchanger := int32(0)
done_clients := int32(0)
ch_partitioner := make(chan bool)
ch_confchange := make(chan bool)
ch_clients := make(chan bool)
clnts := make([]chan int, nclients)
for i := 0; i < nclients; i++ {
clnts[i] = make(chan int, 1)
}
for i := 0; i < 3; i++ {
// log.Printf("Iteration %v\n", i)
atomic.StoreInt32(&done_clients, 0)
atomic.StoreInt32(&done_partitioner, 0)
go SpawnClientsAndWait(t, ch_clients, nclients, func(cli int, t *testing.T) {
j := 0
defer func() {
clnts[cli] <- j
}()
last := ""
for atomic.LoadInt32(&done_clients) == 0 {
if (rand.Int() % 1000) < 500 {
key := strconv.Itoa(cli) + " " + fmt.Sprintf("%08d", j)
value := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y"
// log.Infof("%d: client new put %v,%v\n", cli, key, value)
cluster.MustPut([]byte(key), []byte(value))
last = NextValue(last, value)
j++
} else {
start := strconv.Itoa(cli) + " " + fmt.Sprintf("%08d", 0)
end := strconv.Itoa(cli) + " " + fmt.Sprintf("%08d", j)
// log.Infof("%d: client new scan %v-%v\n", cli, start, end)
values := cluster.Scan([]byte(start), []byte(end))
v := string(bytes.Join(values, []byte("")))
if v != last {
log.Fatalf("get wrong value, client %v\nwant:%v\ngot: %v\n", cli, last, v)
}
}
}
})
if partitions {
// Allow the clients to perform some operations without interruption
time.Sleep(300 * time.Millisecond)
go partitioner(t, cluster, ch_partitioner, &done_partitioner, unreliable, electionTimeout)
}
if confchange {
// Allow the clients to perfrom some operations without interruption
time.Sleep(100 * time.Millisecond)
go confchanger(t, cluster, ch_confchange, &done_confchanger)
}
time.Sleep(2 * time.Second)
atomic.StoreInt32(&done_clients, 1) // tell clients to quit
atomic.StoreInt32(&done_partitioner, 1) // tell partitioner to quit
atomic.StoreInt32(&done_confchanger, 1) // tell confchanger to quit
if partitions {
// log.Printf("wait for partitioner\n")
<-ch_partitioner
// reconnect network and submit a request. A client may
// have submitted a request in a minority. That request
// won't return until that server discovers a new term
// has started.
cluster.ClearFilters()
// wait for a while so that we have a new term
time.Sleep(electionTimeout)
}
// log.Printf("wait for clients\n")
<-ch_clients
if crash {
log.Warnf("shutdown servers\n")
for i := 1; i <= nservers; i++ {
cluster.StopServer(uint64(i))
}
// Wait for a while for servers to shutdown, since
// shutdown isn't a real crash and isn't instantaneous
time.Sleep(electionTimeout)
log.Warnf("restart servers\n")
// crash and re-start all
for i := 1; i <= nservers; i++ {
cluster.StartServer(uint64(i))
}
}
for cli := 0; cli < nclients; cli++ {
// log.Printf("read from clients %d\n", cli)
j := <-clnts[cli]
// if j < 10 {
// log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j)
// }
start := strconv.Itoa(cli) + " " + fmt.Sprintf("%08d", 0)
end := strconv.Itoa(cli) + " " + fmt.Sprintf("%08d", j)
values := cluster.Scan([]byte(start), []byte(end))
v := string(bytes.Join(values, []byte("")))
checkClntAppends(t, cli, v, j)
for k := 0; k < j; k++ {
key := strconv.Itoa(cli) + " " + fmt.Sprintf("%08d", k)
cluster.MustDelete([]byte(key))
}
}
if maxraftlog > 0 {
// Check maximum after the servers have processed all client
// requests and had time to checkpoint.
key := []byte("")
for {
region := cluster.GetRegion(key)
if region == nil {
panic("region is not found")
}
for _, engine := range cluster.engines {
state, err := meta.GetApplyState(engine.Kv, region.GetId())
if err == badger.ErrKeyNotFound {
continue
}
if err != nil {
panic(err)
}
truncatedIdx := state.TruncatedState.Index
appliedIdx := state.AppliedIndex
if appliedIdx-truncatedIdx > 2*uint64(maxraftlog) {
t.Fatalf("logs were not trimmed (%v - %v > 2*%v)", appliedIdx, truncatedIdx, maxraftlog)
}
}
key = region.EndKey
if len(key) == 0 {
break
}
}
}
if split {
r := cluster.GetRegion([]byte(""))
if len(r.GetEndKey()) == 0 {
t.Fatalf("region is not split")
}
}
}
}
func TestBasic2B(t *testing.T) {
// Test: one client (2B) ...
GenericTest(t, "2B", 1, false, false, false, -1, false, false)
}
func TestConcurrent2B(t *testing.T) {
// Test: many clients (2B) ...
GenericTest(t, "2B", 5, false, false, false, -1, false, false)
}
func TestUnreliable2B(t *testing.T) {
// Test: unreliable net, many clients (2B) ...
GenericTest(t, "2B", 5, true, false, false, -1, false, false)
}
// Submit a request in the minority partition and check that the requests
// doesn't go through until the partition heals. The leader in the original
// network ends up in the minority partition.
func TestOnePartition2B(t *testing.T) {
cfg := config.NewTestConfig()
cluster := NewTestCluster(5, cfg)
cluster.Start()
defer cluster.Shutdown()
region := cluster.GetRegion([]byte(""))
leader := cluster.LeaderOfRegion(region.GetId())
s1 := []uint64{leader.GetStoreId()}
s2 := []uint64{}
for _, p := range region.GetPeers() {
if p.GetId() == leader.GetId() {
continue
}
if len(s1) < 3 {
s1 = append(s1, p.GetStoreId())
} else {
s2 = append(s2, p.GetStoreId())
}
}
// leader in majority, partition doesn't affect write/read
cluster.AddFilter(&PartitionFilter{
s1: s1,
s2: s2,
})
cluster.MustPut([]byte("k1"), []byte("v1"))
cluster.MustGet([]byte("k1"), []byte("v1"))
MustGetNone(cluster.engines[s2[0]], []byte("k1"))
MustGetNone(cluster.engines[s2[1]], []byte("k1"))
cluster.ClearFilters()
// old leader in minority, new leader should be elected
s2 = append(s2, s1[2])
s1 = s1[:2]
cluster.AddFilter(&PartitionFilter{
s1: s1,
s2: s2,
})
cluster.MustGet([]byte("k1"), []byte("v1"))
cluster.MustPut([]byte("k1"), []byte("changed"))
MustGetEqual(cluster.engines[s1[0]], []byte("k1"), []byte("v1"))
MustGetEqual(cluster.engines[s1[1]], []byte("k1"), []byte("v1"))
cluster.ClearFilters()
// when partition heals, old leader should sync data
cluster.MustPut([]byte("k2"), []byte("v2"))
MustGetEqual(cluster.engines[s1[0]], []byte("k2"), []byte("v2"))
MustGetEqual(cluster.engines[s1[0]], []byte("k1"), []byte("changed"))
}
func TestManyPartitionsOneClient2B(t *testing.T) {
// Test: partitions, one client (2B) ...
GenericTest(t, "2B", 1, false, false, true, -1, false, false)
}
func TestManyPartitionsManyClients2B(t *testing.T) {
// Test: partitions, many clients (2B) ...
GenericTest(t, "2B", 5, false, false, true, -1, false, false)
}
func TestPersistOneClient2B(t *testing.T) {
// Test: restarts, one client (2B) ...
GenericTest(t, "2B", 1, false, true, false, -1, false, false)
}
func TestPersistConcurrent2B(t *testing.T) {
// Test: restarts, many clients (2B) ...
GenericTest(t, "2B", 5, false, true, false, -1, false, false)
}
func TestPersistConcurrentUnreliable2B(t *testing.T) {
// Test: unreliable net, restarts, many clients (2B) ...
GenericTest(t, "2B", 5, true, true, false, -1, false, false)
}
func TestPersistPartition2B(t *testing.T) {
// Test: restarts, partitions, many clients (2B) ...
GenericTest(t, "2B", 5, false, true, true, -1, false, false)
}
func TestPersistPartitionUnreliable2B(t *testing.T) {
// Test: unreliable net, restarts, partitions, many clients (3A) ...
GenericTest(t, "2B", 5, true, true, true, -1, false, false)
}
func TestOneSnapshot2C(t *testing.T) {
cfg := config.NewTestConfig()
cfg.RaftLogGcCountLimit = 10
cluster := NewTestCluster(3, cfg)
cluster.Start()
defer cluster.Shutdown()
cf := engine_util.CfLock
cluster.MustPutCF(cf, []byte("k1"), []byte("v1"))
cluster.MustPutCF(cf, []byte("k2"), []byte("v2"))
MustGetCfEqual(cluster.engines[1], cf, []byte("k1"), []byte("v1"))
MustGetCfEqual(cluster.engines[1], cf, []byte("k2"), []byte("v2"))
for _, engine := range cluster.engines {
state, err := meta.GetApplyState(engine.Kv, 1)
if err != nil {
t.Fatal(err)
}
if state.TruncatedState.Index != meta.RaftInitLogIndex ||
state.TruncatedState.Term != meta.RaftInitLogTerm {
t.Fatalf("unexpected truncated state %v", state.TruncatedState)
}
}
cluster.AddFilter(
&PartitionFilter{
s1: []uint64{1},
s2: []uint64{2, 3},
},
)
// write some data to trigger snapshot
for i := 100; i < 115; i++ {
cluster.MustPutCF(cf, []byte(fmt.Sprintf("k%d", i)), []byte(fmt.Sprintf("v%d", i)))
}
cluster.MustDeleteCF(cf, []byte("k2"))
time.Sleep(500 * time.Millisecond)
MustGetCfNone(cluster.engines[1], cf, []byte("k100"))
cluster.ClearFilters()
// Now snapshot must applied on
MustGetCfEqual(cluster.engines[1], cf, []byte("k1"), []byte("v1"))
MustGetCfEqual(cluster.engines[1], cf, []byte("k100"), []byte("v100"))
MustGetCfNone(cluster.engines[1], cf, []byte("k2"))
cluster.StopServer(1)
cluster.StartServer(1)
MustGetCfEqual(cluster.engines[1], cf, []byte("k1"), []byte("v1"))
for _, engine := range cluster.engines {
state, err := meta.GetApplyState(engine.Kv, 1)
if err != nil {
t.Fatal(err)
}
truncatedIdx := state.TruncatedState.Index
appliedIdx := state.AppliedIndex
if appliedIdx-truncatedIdx > 2*uint64(cfg.RaftLogGcCountLimit) {
t.Fatalf("logs were not trimmed (%v - %v > 2*%v)", appliedIdx, truncatedIdx, cfg.RaftLogGcCountLimit)
}
}
}
func TestSnapshotRecover2C(t *testing.T) {
// Test: restarts, snapshots, one client (2C) ...
GenericTest(t, "2C", 1, false, true, false, 100, false, false)
}
func TestSnapshotRecoverManyClients2C(t *testing.T) {
// Test: restarts, snapshots, many clients (2C) ...
GenericTest(t, "2C", 20, false, true, false, 100, false, false)
}
func TestSnapshotUnreliable2C(t *testing.T) {
// Test: unreliable net, snapshots, many clients (2C) ...
GenericTest(t, "2C", 5, true, false, false, 100, false, false)
}
func TestSnapshotUnreliableRecover2C(t *testing.T) {
// Test: unreliable net, restarts, snapshots, many clients (2C) ...
GenericTest(t, "2C", 5, true, true, false, 100, false, false)
}
func TestSnapshotUnreliableRecoverConcurrentPartition2C(t *testing.T) {
// Test: unreliable net, restarts, partitions, snapshots, many clients (2C) ...
GenericTest(t, "2C", 5, true, true, true, 100, false, false)
}
func TestTransferLeader3B(t *testing.T) {
cfg := config.NewTestConfig()
cluster := NewTestCluster(5, cfg)
cluster.Start()
defer cluster.Shutdown()
regionID := cluster.GetRegion([]byte("")).GetId()
cluster.MustTransferLeader(regionID, NewPeer(1, 1))
cluster.MustTransferLeader(regionID, NewPeer(2, 2))
cluster.MustTransferLeader(regionID, NewPeer(3, 3))
cluster.MustTransferLeader(regionID, NewPeer(4, 4))
cluster.MustTransferLeader(regionID, NewPeer(5, 5))
}
func TestBasicConfChange3B(t *testing.T) {
cfg := config.NewTestConfig()
cluster := NewTestCluster(5, cfg)
cluster.Start()
defer cluster.Shutdown()
cluster.MustRemovePeer(1, NewPeer(2, 2))
cluster.MustRemovePeer(1, NewPeer(3, 3))
cluster.MustRemovePeer(1, NewPeer(4, 4))
cluster.MustRemovePeer(1, NewPeer(5, 5))
// now region 1 only has peer: (1, 1)
cluster.MustPut([]byte("k1"), []byte("v1"))
MustGetNone(cluster.engines[2], []byte("k1"))
// add peer (2, 2) to region 1
cluster.MustAddPeer(1, NewPeer(2, 2))
cluster.MustPut([]byte("k2"), []byte("v2"))
cluster.MustGet([]byte("k2"), []byte("v2"))
MustGetEqual(cluster.engines[2], []byte("k1"), []byte("v1"))
MustGetEqual(cluster.engines[2], []byte("k2"), []byte("v2"))
epoch := cluster.GetRegion([]byte("k1")).GetRegionEpoch()
assert.True(t, epoch.GetConfVer() > 1)
// peer 5 must not exist
MustGetNone(cluster.engines[5], []byte("k1"))
// add peer (3, 3) to region 1
cluster.MustAddPeer(1, NewPeer(3, 3))
cluster.MustRemovePeer(1, NewPeer(2, 2))
cluster.MustPut([]byte("k3"), []byte("v3"))
cluster.MustGet([]byte("k3"), []byte("v3"))
MustGetEqual(cluster.engines[3], []byte("k1"), []byte("v1"))
MustGetEqual(cluster.engines[3], []byte("k2"), []byte("v2"))
MustGetEqual(cluster.engines[3], []byte("k3"), []byte("v3"))
// peer 2 has nothing
MustGetNone(cluster.engines[2], []byte("k1"))
MustGetNone(cluster.engines[2], []byte("k2"))
cluster.MustAddPeer(1, NewPeer(2, 2))
MustGetEqual(cluster.engines[2], []byte("k1"), []byte("v1"))
MustGetEqual(cluster.engines[2], []byte("k2"), []byte("v2"))
MustGetEqual(cluster.engines[2], []byte("k3"), []byte("v3"))
// remove peer (2, 2) from region 1
cluster.MustRemovePeer(1, NewPeer(2, 2))
// add peer (2, 4) to region 1
cluster.MustAddPeer(1, NewPeer(2, 4))
// remove peer (3, 3) from region 1
cluster.MustRemovePeer(1, NewPeer(3, 3))
cluster.MustPut([]byte("k4"), []byte("v4"))
MustGetEqual(cluster.engines[2], []byte("k1"), []byte("v1"))
MustGetEqual(cluster.engines[2], []byte("k4"), []byte("v4"))
MustGetNone(cluster.engines[3], []byte("k1"))
MustGetNone(cluster.engines[3], []byte("k4"))
}
func TestConfChangeRecover3B(t *testing.T) {
// Test: restarts, snapshots, conf change, one client (3B) ...
GenericTest(t, "3B", 1, false, true, false, -1, true, false)
}
func TestConfChangeRecoverManyClients3B(t *testing.T) {
// Test: restarts, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 20, false, true, false, -1, true, false)
}
func TestConfChangeUnreliable3B(t *testing.T) {
// Test: unreliable net, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, false, false, -1, true, false)
}
func TestConfChangeUnreliableRecover3B(t *testing.T) {
// Test: unreliable net, restarts, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, true, false, -1, true, false)
}
func TestConfChangeSnapshotUnreliableRecover3B(t *testing.T) {
// Test: unreliable net, restarts, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, true, false, 100, true, false)
}
func TestConfChangeSnapshotUnreliableRecoverConcurrentPartition3B(t *testing.T) {
// Test: unreliable net, restarts, partitions, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, true, true, 100, true, false)
}
func TestOneSplit3B(t *testing.T) {
cfg := config.NewTestConfig()
cfg.RegionMaxSize = 800
cfg.RegionSplitSize = 500
cluster := NewTestCluster(5, cfg)
cluster.Start()
defer cluster.Shutdown()
cluster.MustPut([]byte("k1"), []byte("v1"))
cluster.MustPut([]byte("k2"), []byte("v2"))
region := cluster.GetRegion([]byte("k1"))
region1 := cluster.GetRegion([]byte("k2"))
assert.Equal(t, region.GetId(), region1.GetId())
cluster.AddFilter(
&PartitionFilter{
s1: []uint64{1, 2, 3, 4},
s2: []uint64{5},
},
)
// write some data to trigger split
for i := 100; i < 200; i++ {
cluster.MustPut([]byte(fmt.Sprintf("k%d", i)), []byte(fmt.Sprintf("v%d", i)))
}
time.Sleep(200 * time.Millisecond)
cluster.ClearFilters()
left := cluster.GetRegion([]byte("k1"))
right := cluster.GetRegion([]byte("k2"))
assert.NotEqual(t, left.GetId(), right.GetId())
assert.True(t, bytes.Equal(region.GetStartKey(), left.GetStartKey()))
assert.True(t, bytes.Equal(left.GetEndKey(), right.GetStartKey()))
assert.True(t, bytes.Equal(right.GetEndKey(), region.GetEndKey()))
req := NewRequest(left.GetId(), left.GetRegionEpoch(), []*raft_cmdpb.Request{NewGetCfCmd(engine_util.CfDefault, []byte("k2"))})
resp, _ := cluster.CallCommandOnLeader(&req, time.Second)
assert.NotNil(t, resp.GetHeader().GetError())
assert.NotNil(t, resp.GetHeader().GetError().GetKeyNotInRegion())
MustGetEqual(cluster.engines[5], []byte("k100"), []byte("v100"))
}
func TestSplitRecover3B(t *testing.T) {
// Test: restarts, snapshots, conf change, one client (3B) ...
GenericTest(t, "3B", 1, false, true, false, -1, false, true)
}
func TestSplitRecoverManyClients3B(t *testing.T) {
// Test: restarts, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 20, false, true, false, -1, false, true)
}
func TestSplitUnreliable3B(t *testing.T) {
// Test: unreliable net, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, false, false, -1, false, true)
}
func TestSplitUnreliableRecover3B(t *testing.T) {
// Test: unreliable net, restarts, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, true, false, -1, false, true)
}
func TestSplitConfChangeSnapshotUnreliableRecover3B(t *testing.T) {
// Test: unreliable net, restarts, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, true, false, 100, true, true)
}
func TestSplitConfChangeSnapshotUnreliableRecoverConcurrentPartition3B(t *testing.T) {
// Test: unreliable net, restarts, partitions, snapshots, conf change, many clients (3B) ...
GenericTest(t, "3B", 5, true, true, true, 100, true, true)
}

137
kv/test_raftstore/utils.go Normal file
View File

@ -0,0 +1,137 @@
package test_raftstore
import (
"bytes"
"encoding/hex"
"fmt"
"time"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/log"
"github.com/pingcap-incubator/tinykv/proto/pkg/metapb"
"github.com/pingcap-incubator/tinykv/proto/pkg/raft_cmdpb"
)
func SleepMS(ms int64) {
time.Sleep(time.Duration(ms) * time.Millisecond)
}
func NewPeer(storeID, peerID uint64) *metapb.Peer {
peer := &metapb.Peer{
StoreId: storeID,
Id: peerID,
}
return peer
}
func NewBaseRequest(regionID uint64, epoch *metapb.RegionEpoch) raft_cmdpb.RaftCmdRequest {
req := raft_cmdpb.RaftCmdRequest{}
req.Header = &raft_cmdpb.RaftRequestHeader{RegionId: regionID, RegionEpoch: epoch}
return req
}
func NewRequest(regionID uint64, epoch *metapb.RegionEpoch, requests []*raft_cmdpb.Request) raft_cmdpb.RaftCmdRequest {
req := NewBaseRequest(regionID, epoch)
req.Requests = requests
return req
}
func NewAdminRequest(regionID uint64, epoch *metapb.RegionEpoch, request *raft_cmdpb.AdminRequest) *raft_cmdpb.RaftCmdRequest {
req := NewBaseRequest(regionID, epoch)
req.AdminRequest = request
return &req
}
func NewPutCfCmd(cf string, key, value []byte) *raft_cmdpb.Request {
cmd := &raft_cmdpb.Request{}
cmd.CmdType = raft_cmdpb.CmdType_Put
cmd.Put = &raft_cmdpb.PutRequest{Key: key, Value: value, Cf: cf}
return cmd
}
func NewGetCfCmd(cf string, key []byte) *raft_cmdpb.Request {
get := &raft_cmdpb.GetRequest{
Cf: cf,
Key: key,
}
cmd := &raft_cmdpb.Request{
CmdType: raft_cmdpb.CmdType_Get,
Get: get,
}
return cmd
}
func NewDeleteCfCmd(cf string, key []byte) *raft_cmdpb.Request {
delete := &raft_cmdpb.DeleteRequest{
Cf: cf,
Key: key,
}
cmd := &raft_cmdpb.Request{
CmdType: raft_cmdpb.CmdType_Delete,
Delete: delete,
}
return cmd
}
func NewSnapCmd() *raft_cmdpb.Request {
cmd := &raft_cmdpb.Request{
CmdType: raft_cmdpb.CmdType_Snap,
Snap: &raft_cmdpb.SnapRequest{},
}
return cmd
}
func NewTransferLeaderCmd(peer *metapb.Peer) *raft_cmdpb.AdminRequest {
transferLeader := raft_cmdpb.TransferLeaderRequest{Peer: peer}
cmd := &raft_cmdpb.AdminRequest{
CmdType: raft_cmdpb.AdminCmdType_TransferLeader,
TransferLeader: &transferLeader,
}
return cmd
}
func MustGetCf(engine *engine_util.Engines, cf string, key []byte, value []byte) {
for i := 0; i < 300; i++ {
val, err := engine_util.GetCF(engine.Kv, cf, key)
if err == nil && (value == nil || bytes.Compare(val, value) == 0) {
return
}
SleepMS(20)
}
panic(fmt.Sprintf("can't get value %s for key %s", hex.EncodeToString(value), hex.EncodeToString(key)))
}
func MustGetCfEqual(engine *engine_util.Engines, cf string, key []byte, value []byte) {
MustGetCf(engine, cf, key, value)
}
func MustGetEqual(engine *engine_util.Engines, key []byte, value []byte) {
MustGetCf(engine, engine_util.CfDefault, key, value)
}
func MustGetCfNone(engine *engine_util.Engines, cf string, key []byte) {
var val []byte
var err error
for i := 0; i < 300; i++ {
val, err = engine_util.GetCF(engine.Kv, cf, key)
if err == badger.ErrKeyNotFound {
return
}
SleepMS(20)
}
panic(fmt.Sprintf("get value %s for key %s", hex.EncodeToString(val), hex.EncodeToString(key)))
}
func MustGetNone(engine *engine_util.Engines, key []byte) {
MustGetCfNone(engine, engine_util.CfDefault, key)
}
func NewTestCluster(count int, cfg *config.Config) *Cluster {
log.SetLevelByString(cfg.LogLevel)
log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds | log.Lshortfile)
schedulerClient := NewMockSchedulerClient(0, uint64(count)+1)
simulator := NewNodeSimulator(schedulerClient)
return NewCluster(count, schedulerClient, simulator, cfg)
}

View File

@ -0,0 +1,596 @@
package transaction
import (
"testing"
"github.com/pingcap-incubator/tinykv/kv/transaction/mvcc"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"github.com/stretchr/testify/assert"
)
// TestGetValue4B getting a value works in the simple case.
func TestGetValue4B(t *testing.T) {
builder := newBuilder(t)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{99}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
})
var req kvrpcpb.GetRequest
req.Key = []byte{99}
req.Version = mvcc.TsMax
resp := builder.runOneRequest(&req).(*kvrpcpb.GetResponse)
assert.Nil(t, resp.RegionError)
assert.Nil(t, resp.Error)
assert.Equal(t, []byte{42}, resp.Value)
}
// TestGetValueTs4B getting a value works with different timestamps.
func TestGetValueTs4B(t *testing.T) {
builder := newBuilder(t)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{99}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
})
var req0 kvrpcpb.GetRequest
req0.Key = []byte{99}
req0.Version = 100
var req1 kvrpcpb.GetRequest
req1.Key = []byte{99}
req1.Version = 100
var req2 kvrpcpb.GetRequest
req2.Key = []byte{99}
req2.Version = 100
resps := builder.runRequests(&req0, &req1, &req2)
resp0 := resps[0].(*kvrpcpb.GetResponse)
resp1 := resps[1].(*kvrpcpb.GetResponse)
resp2 := resps[2].(*kvrpcpb.GetResponse)
assert.Nil(t, resp0.RegionError)
assert.Nil(t, resp0.Error)
assert.Equal(t, []byte{42}, resp0.Value)
assert.Nil(t, resp1.RegionError)
assert.Nil(t, resp1.Error)
assert.Equal(t, []byte{42}, resp1.Value)
assert.Nil(t, resp2.RegionError)
assert.Nil(t, resp2.Error)
assert.Equal(t, []byte{42}, resp2.Value)
}
// TestGetEmpty4B tests that get on an empty DB.
func TestGetEmpty4B(t *testing.T) {
builder := newBuilder(t)
var req kvrpcpb.GetRequest
req.Key = []byte{100}
req.Version = mvcc.TsMax
resp := builder.runOneRequest(&req).(*kvrpcpb.GetResponse)
assert.Nil(t, resp.RegionError)
assert.Nil(t, resp.Error)
assert.Equal(t, []byte(nil), resp.Value)
}
// TestGetNone4B tests that getting a missing key works.
func TestGetNone4B(t *testing.T) {
builder := newBuilder(t)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{99}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
{cf: engine_util.CfDefault, key: []byte{101}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{101}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
})
var req kvrpcpb.GetRequest
req.Key = []byte{100}
req.Version = mvcc.TsMax
resp := builder.runOneRequest(&req).(*kvrpcpb.GetResponse)
assert.Nil(t, resp.RegionError)
assert.Nil(t, resp.Error)
assert.Equal(t, []byte(nil), resp.Value)
}
// TestGetVersions4B tests we get the correct value when there are multiple versions.
func TestGetVersions4B(t *testing.T) {
builder := newBuilder(t)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{99}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
{cf: engine_util.CfDefault, key: []byte{99}, ts: 60, value: []byte{43}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 66, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 60}},
{cf: engine_util.CfDefault, key: []byte{99}, ts: 120, value: []byte{44}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 122, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 120}},
})
var req0 kvrpcpb.GetRequest
req0.Key = []byte{99}
req0.Version = 40
var req1 kvrpcpb.GetRequest
req1.Key = []byte{99}
req1.Version = 56
var req2 kvrpcpb.GetRequest
req2.Key = []byte{99}
req2.Version = 60
var req3 kvrpcpb.GetRequest
req3.Key = []byte{99}
req3.Version = 65
var req4 kvrpcpb.GetRequest
req4.Key = []byte{99}
req4.Version = 66
var req5 kvrpcpb.GetRequest
req5.Key = []byte{99}
req5.Version = 100
resps := builder.runRequests(&req0, &req1, &req2, &req3, &req4, &req5)
resp0 := resps[0].(*kvrpcpb.GetResponse)
resp1 := resps[1].(*kvrpcpb.GetResponse)
resp2 := resps[2].(*kvrpcpb.GetResponse)
resp3 := resps[3].(*kvrpcpb.GetResponse)
resp4 := resps[4].(*kvrpcpb.GetResponse)
resp5 := resps[5].(*kvrpcpb.GetResponse)
assert.Nil(t, resp0.RegionError)
assert.Nil(t, resp0.Error)
assert.Equal(t, []byte(nil), resp0.Value)
assert.Nil(t, resp1.RegionError)
assert.Nil(t, resp1.Error)
assert.Equal(t, []byte{42}, resp1.Value)
assert.Nil(t, resp2.RegionError)
assert.Nil(t, resp2.Error)
assert.Equal(t, []byte{42}, resp2.Value)
assert.Nil(t, resp3.RegionError)
assert.Nil(t, resp3.Error)
assert.Equal(t, []byte{42}, resp3.Value)
assert.Nil(t, resp4.RegionError)
assert.Nil(t, resp4.Error)
assert.Equal(t, []byte{43}, resp4.Value)
assert.Nil(t, resp5.RegionError)
assert.Nil(t, resp5.Error)
assert.Equal(t, []byte{43}, resp5.Value)
}
// TestGetDeleted4B tests we get the correct value when there are multiple versions, including a deletion.
func TestGetDeleted4B(t *testing.T) {
builder := newBuilder(t)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{99}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
{cf: engine_util.CfDefault, key: []byte{99}, ts: 60, value: nil},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 66, value: []byte{2, 0, 0, 0, 0, 0, 0, 0, 60}},
{cf: engine_util.CfDefault, key: []byte{99}, ts: 120, value: []byte{44}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 122, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 120}},
})
var req0 kvrpcpb.GetRequest
req0.Key = []byte{99}
req0.Version = 54
var req1 kvrpcpb.GetRequest
req1.Key = []byte{99}
req1.Version = 60
var req2 kvrpcpb.GetRequest
req2.Key = []byte{99}
req2.Version = 65
var req3 kvrpcpb.GetRequest
req3.Key = []byte{99}
req3.Version = 66
var req4 kvrpcpb.GetRequest
req4.Key = []byte{99}
req4.Version = 67
var req5 kvrpcpb.GetRequest
req5.Key = []byte{99}
req5.Version = 122
resps := builder.runRequests(&req0, &req1, &req2, &req3, &req4, &req5)
resp0 := resps[0].(*kvrpcpb.GetResponse)
resp1 := resps[1].(*kvrpcpb.GetResponse)
resp2 := resps[2].(*kvrpcpb.GetResponse)
resp3 := resps[3].(*kvrpcpb.GetResponse)
resp4 := resps[4].(*kvrpcpb.GetResponse)
resp5 := resps[5].(*kvrpcpb.GetResponse)
assert.Nil(t, resp0.RegionError)
assert.Nil(t, resp0.Error)
assert.Equal(t, []byte{42}, resp0.Value)
assert.Nil(t, resp1.RegionError)
assert.Nil(t, resp1.Error)
assert.Equal(t, []byte{42}, resp1.Value)
assert.Nil(t, resp2.RegionError)
assert.Nil(t, resp2.Error)
assert.Equal(t, []byte{42}, resp2.Value)
assert.Nil(t, resp3.RegionError)
assert.Nil(t, resp3.Error)
assert.Equal(t, []byte(nil), resp3.Value)
assert.Nil(t, resp4.RegionError)
assert.Nil(t, resp4.Error)
assert.Equal(t, []byte(nil), resp4.Value)
assert.Nil(t, resp5.RegionError)
assert.Nil(t, resp5.Error)
assert.Equal(t, []byte{44}, resp5.Value)
}
// TestGetLocked4B tests getting a value when it is locked by another transaction.
func TestGetLocked4B(t *testing.T) {
builder := newBuilder(t)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{99}, ts: 50, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{99}, ts: 54, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 50}},
{cf: engine_util.CfLock, key: []byte{99}, value: []byte{99, 1, 0, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 0, 0}},
})
var req0 kvrpcpb.GetRequest
req0.Key = []byte{99}
req0.Version = 55
var req1 kvrpcpb.GetRequest
req1.Key = []byte{99}
req1.Version = 300
resps := builder.runRequests(&req0, &req1)
resp0 := resps[0].(*kvrpcpb.GetResponse)
resp1 := resps[1].(*kvrpcpb.GetResponse)
assert.Nil(t, resp0.RegionError)
assert.Nil(t, resp0.Error)
assert.Equal(t, []byte{42}, resp0.Value)
assert.Nil(t, resp1.RegionError)
lockInfo := resp1.Error.Locked
assert.Equal(t, []byte{99}, lockInfo.Key)
assert.Equal(t, []byte{99}, lockInfo.PrimaryLock)
assert.Equal(t, uint64(200), lockInfo.LockVersion)
}
// TestEmptyPrewrite4B tests that a Prewrite with no mutations succeeds and changes nothing.
func TestEmptyPrewrite4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest()
resp := builder.runOneRequest(cmd).(*kvrpcpb.PrewriteResponse)
assert.Empty(t, resp.Errors)
assert.Nil(t, resp.RegionError)
builder.assertLen(engine_util.CfDefault, 0)
}
// TestSinglePrewrite4B tests a prewrite with one write, it should succeed, we test all the expected values.
func TestSinglePrewrite4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(mutation(3, []byte{42}, kvrpcpb.Op_Put))
cmd.LockTtl = 1000
resp := builder.runOneRequest(cmd).(*kvrpcpb.PrewriteResponse)
assert.Empty(t, resp.Errors)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 1, 0)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 3, 232}},
})
}
// TestPrewriteLocked4B tests that two prewrites to the same key causes a lock error.
func TestPrewriteLocked4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(mutation(3, []byte{42}, kvrpcpb.Op_Put))
cmd2 := builder.prewriteRequest(mutation(3, []byte{53}, kvrpcpb.Op_Put))
resps := builder.runRequests(cmd, cmd2)
assert.Empty(t, resps[0].(*kvrpcpb.PrewriteResponse).Errors)
assert.Nil(t, resps[0].(*kvrpcpb.PrewriteResponse).RegionError)
assert.Equal(t, len(resps[1].(*kvrpcpb.PrewriteResponse).Errors), 1)
assert.Nil(t, resps[1].(*kvrpcpb.PrewriteResponse).RegionError)
builder.assertLens(1, 1, 0)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0}},
})
}
// TestPrewriteWritten4B tests an attempted prewrite with a write conflict.
func TestPrewriteWritten4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(mutation(3, []byte{42}, kvrpcpb.Op_Put))
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 80, value: []byte{5}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 101, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.PrewriteResponse)
assert.Equal(t, 1, len(resp.Errors))
assert.NotNil(t, resp.Errors[0].Conflict)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 80, value: []byte{5}},
})
}
// TestPrewriteWrittenNoConflict4B tests an attempted prewrite with a write already present, but no conflict.
func TestPrewriteWrittenNoConflict4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(mutation(3, []byte{42}, kvrpcpb.Op_Put))
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 80, value: []byte{5}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 90, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.PrewriteResponse)
assert.Empty(t, resp.Errors)
assert.Nil(t, resp.RegionError)
assert.Nil(t, resp.RegionError)
builder.assertLens(2, 1, 1)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{5}, ts: 80},
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
}
// TestMultiplePrewrites4B tests that multiple prewrites to different keys succeeds.
func TestMultiplePrewrites4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(mutation(3, []byte{42}, kvrpcpb.Op_Put))
cmd2 := builder.prewriteRequest(mutation(4, []byte{53}, kvrpcpb.Op_Put))
resps := builder.runRequests(cmd, cmd2)
assert.Empty(t, resps[0].(*kvrpcpb.PrewriteResponse).Errors)
assert.Nil(t, resps[0].(*kvrpcpb.PrewriteResponse).RegionError)
assert.Empty(t, resps[1].(*kvrpcpb.PrewriteResponse).Errors)
assert.Nil(t, resps[1].(*kvrpcpb.PrewriteResponse).RegionError)
builder.assertLens(2, 2, 0)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{4}, ts: 101, value: []byte{53}},
{cf: engine_util.CfLock, key: []byte{4}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, 101, 0, 0, 0, 0, 0, 0, 0, 0}},
})
}
// TestPrewriteOverwrite4B tests that two writes in the same prewrite succeed and we see the second write.
func TestPrewriteOverwrite4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(mutation(3, []byte{42}, kvrpcpb.Op_Put), mutation(3, []byte{45}, kvrpcpb.Op_Put))
resp := builder.runOneRequest(cmd).(*kvrpcpb.PrewriteResponse)
assert.Empty(t, resp.Errors)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 1, 0)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{45}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
}
// TestPrewriteMultiple4B tests that a prewrite with multiple mutations succeeds.
func TestPrewriteMultiple4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.prewriteRequest(
mutation(3, []byte{42}, kvrpcpb.Op_Put),
mutation(4, []byte{43}, kvrpcpb.Op_Put),
mutation(5, []byte{44}, kvrpcpb.Op_Put),
mutation(4, nil, kvrpcpb.Op_Del),
mutation(4, []byte{1, 3, 5}, kvrpcpb.Op_Put),
mutation(255, []byte{45}, kvrpcpb.Op_Put),
)
resp := builder.runOneRequest(cmd).(*kvrpcpb.PrewriteResponse)
assert.Empty(t, resp.Errors)
assert.Nil(t, resp.RegionError)
builder.assertLens(4, 4, 0)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{4}, value: []byte{1, 3, 5}},
})
}
// TestEmptyCommit4B tests a commit request with no keys to commit.
func TestEmptyCommit4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([][]byte{}...)
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 0)
}
// TestSimpleCommit4B tests committing a single key.
func TestSingleCommit4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfDefault, key: []byte{3}},
})
}
// TestCommitOverwrite4B tests committing where there is already a write.
func TestCommitOverwrite4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3})
builder.init([]kv{
// A previous, committed write.
{cf: engine_util.CfDefault, key: []byte{3}, ts: 80, value: []byte{15}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 84, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
// The current, pre-written write.
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(2, 0, 2)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfDefault, key: []byte{3}},
})
}
// TestCommitMultipleKeys4B tests committing multiple keys in the same commit. Also puts some other data in the DB and test
// that it is unchanged.
func TestCommitMultipleKeys4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3}, []byte{12, 4, 0}, []byte{15})
builder.init([]kv{
// Current, pre-written.
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{12, 4, 0}, value: []byte{1, 1, 0, 0, 1, 5}},
{cf: engine_util.CfLock, key: []byte{12, 4, 0}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{15}, value: []byte{0}},
{cf: engine_util.CfLock, key: []byte{15}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
// Some committed data.
{cf: engine_util.CfDefault, key: []byte{4}, ts: 80, value: []byte{15}},
{cf: engine_util.CfWrite, key: []byte{4}, ts: 84, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{cf: engine_util.CfDefault, key: []byte{3, 0}, ts: 80, value: []byte{150}},
{cf: engine_util.CfWrite, key: []byte{3, 0}, ts: 84, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
// Another pre-written transaction.
{cf: engine_util.CfDefault, key: []byte{2}, ts: 99, value: []byte{0, 0, 0, 8}},
{cf: engine_util.CfLock, key: []byte{2}, value: []byte{1, 2, 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{43, 6}, ts: 99, value: []byte{1, 1, 0, 0, 1, 5}},
{cf: engine_util.CfLock, key: []byte{43, 6}, value: []byte{1, 2, 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(7, 2, 5)
builder.assert([]kv{
// The newly committed data.
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfWrite, key: []byte{12, 4, 0}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfWrite, key: []byte{15}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
// Committed data is untouched.
{cf: engine_util.CfDefault, key: []byte{4}, ts: 80},
{cf: engine_util.CfWrite, key: []byte{4}, ts: 84},
{cf: engine_util.CfDefault, key: []byte{3, 0}, ts: 80},
{cf: engine_util.CfWrite, key: []byte{3, 0}, ts: 84},
// Pre-written data is untouched.
{cf: engine_util.CfDefault, key: []byte{2}, ts: 99},
{cf: engine_util.CfLock, key: []byte{2}},
{cf: engine_util.CfDefault, key: []byte{43, 6}, ts: 99},
{cf: engine_util.CfLock, key: []byte{43, 6}},
})
}
// TestRecommitKey4B tests committing the same key multiple times in one commit.
func TestRecommitKey4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3}, []byte{3})
builder.init([]kv{
// The current, pre-written write.
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfDefault, key: []byte{3}},
})
}
// TestCommitConflictRollback4B tests committing a rolled back transaction.
func TestCommitConflictRollback4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110},
})
}
// TestCommitConflictRace4B tests committing where a key is pre-written by a different transaction.
func TestCommitConflictRace4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 90, value: []byte{110}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 3, 0, 0, 0, 0, 0, 0, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.NotNil(t, resp.Error.Retryable)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 1, 0)
builder.assert([]kv{
{cf: engine_util.CfLock, key: []byte{3}},
{cf: engine_util.CfDefault, key: []byte{3}, ts: 90},
})
}
// TestCommitConflictRepeat4B tests recommitting a transaction (i.e., the same commit request is received twice).
func TestCommitConflictRepeat4B(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110},
{cf: engine_util.CfDefault, key: []byte{3}},
})
}
// TestCommitMissingPrewrite4a tests committing a transaction which was not prewritten (i.e., a request was lost, but
// the commit request was not).
func TestCommitMissingPrewrite4a(t *testing.T) {
builder := newBuilder(t)
cmd := builder.commitRequest([]byte{3})
builder.init([]kv{
// Some committed data.
{cf: engine_util.CfDefault, key: []byte{4}, ts: 80, value: []byte{15}},
{cf: engine_util.CfWrite, key: []byte{4}, ts: 84, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{cf: engine_util.CfDefault, key: []byte{3, 0}, ts: 80, value: []byte{150}},
{cf: engine_util.CfWrite, key: []byte{3, 0}, ts: 84, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
// Note no prewrite.
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CommitResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(2, 0, 2)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{4}, ts: 80},
{cf: engine_util.CfWrite, key: []byte{4}, ts: 84},
{cf: engine_util.CfDefault, key: []byte{3, 0}, ts: 80},
{cf: engine_util.CfWrite, key: []byte{3, 0}, ts: 84},
})
}

View File

@ -0,0 +1,484 @@
package transaction
import (
"encoding/binary"
"testing"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"github.com/stretchr/testify/assert"
)
// TestEmptyRollback4C tests a rollback with no keys.
func TestEmptyRollback4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([][]byte{}...)
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 0)
}
// TestRollback4C tests a successful rollback.
func TestRollback4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([]byte{3})
builder.init([]kv{
// See TestSinglePrewrite.
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
}
// TestRollbackDuplicateKeys4C tests a rollback which rolls back multiple keys, including one duplicated key.
func TestRollbackDuplicateKeys4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([]byte{3}, []byte{15}, []byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{15}, value: []byte{0}},
{cf: engine_util.CfLock, key: []byte{15}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 2)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfWrite, key: []byte{15}, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
}
// TestRollbackMissingPrewrite4C tests trying to roll back a missing prewrite.
func TestRollbackMissingPrewrite4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([]byte{3})
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
}
// TestRollbackCommitted4C tests trying to roll back a transaction which is already committed.
func TestRollbackCommitted4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.NotNil(t, resp.Error.Abort)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 110},
})
}
// TestRollbackDuplicate4C tests trying to roll back a transaction which has already been rolled back.
func TestRollbackDuplicate4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 100, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 100},
})
}
// TestRollbackOtherTxn4C tests trying to roll back the wrong transaction.
func TestRollbackOtherTxn4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.rollbackRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 80, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 1, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.BatchRollbackResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 1, 1)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 80},
{cf: engine_util.CfLock, key: []byte{3}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 100, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, builder.ts()}},
})
}
// TestCheckTxnStatusTtlExpired4C checks that if there is a lock and its ttl has expired, then it is rolled back.
func TestCheckTxnStatusTtlExpired4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.checkTxnStatusRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{3, 1, 0, 0, 5, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 8}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CheckTxnStatusResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, kvrpcpb.Action_TTLExpireRollback, resp.Action)
builder.assertLens(0, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: cmd.LockTs, value: []byte{3, 0, 0, 5, 0, 0, 0, 0, builder.ts()}},
})
}
// TestCheckTxnStatusTtlNotExpired4C checks that if there is a lock and its ttl has not expired, then nothing changes.
func TestCheckTxnStatusTtlNotExpired4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.checkTxnStatusRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{3, 1, 0, 0, 5, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 1, 0, 0, 0, 8}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CheckTxnStatusResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, kvrpcpb.Action_NoAction, resp.Action)
builder.assertLens(1, 1, 0)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs},
{cf: engine_util.CfLock, key: []byte{3}},
})
}
// TestCheckTxnStatusRolledBack4C tests checking a key which has already been rolled back..
func TestCheckTxnStatusRolledBack4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.checkTxnStatusRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: cmd.LockTs, value: []byte{3, 0, 0, 5, 0, 0, 0, 0, builder.ts()}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{3, 1, 0, 0, 8, 0, 0, 0, 0, builder.ts(), 0, 0, 0, 0, 0, 0, 0, 8}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CheckTxnStatusResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, kvrpcpb.Action_NoAction, resp.Action)
assert.Equal(t, uint64(0), resp.CommitVersion)
builder.assertLens(1, 1, 1)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs},
{cf: engine_util.CfWrite, key: []byte{3}, ts: cmd.LockTs},
{cf: engine_util.CfLock, key: []byte{3}},
})
}
// TestCheckTxnStatusCommitted4C tests checking a key which has already been committed.
func TestCheckTxnStatusCommitted4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.checkTxnStatusRequest([]byte{3})
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{3}, ts: cmd.LockTs, value: []byte{1, 0, 0, 5, 0, 0, 0, 0, builder.ts()}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CheckTxnStatusResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, kvrpcpb.Action_NoAction, resp.Action)
assert.Equal(t, binary.BigEndian.Uint64([]byte{0, 0, 5, 0, 0, 0, 0, builder.ts()}), resp.CommitVersion)
builder.assertLens(1, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: cmd.LockTs},
{cf: engine_util.CfWrite, key: []byte{3}, ts: cmd.LockTs},
})
}
// TestCheckTxnStatusNoLockNoWrite4C checks if there is no data for the key, then we get the right response.
func TestCheckTxnStatusNoLockNoWrite4C(t *testing.T) {
builder := newBuilder(t)
cmd := builder.checkTxnStatusRequest([]byte{3})
resp := builder.runOneRequest(cmd).(*kvrpcpb.CheckTxnStatusResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, kvrpcpb.Action_LockNotExistRollback, resp.Action)
builder.assertLens(0, 0, 1)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: cmd.LockTs, value: []byte{3, 0, 0, 5, 0, 0, 0, 0, builder.ts()}},
})
}
// TestEmptyResolve4C tests a completely empty resolve request.
func TestEmptyResolve4C(t *testing.T) {
builder := newBuilder(t)
cmd := resolveRequest(0, 0)
resp := builder.runOneRequest(cmd).(*kvrpcpb.ResolveLockResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(0, 0, 0)
}
// TestResolveCommit4C should commit all keys in the specified transaction.
func TestResolveCommit4C(t *testing.T) {
builder := newBuilder(t)
cmd := resolveRequest(100, 120)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100, value: []byte{43}},
{cf: engine_util.CfLock, key: []byte{7}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110, value: []byte{44}},
{cf: engine_util.CfLock, key: []byte{200}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.ResolveLockResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(3, 1, 2)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100},
{cf: engine_util.CfWrite, key: []byte{3}, ts: 120, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100},
{cf: engine_util.CfWrite, key: []byte{7}, ts: 120, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110},
{cf: engine_util.CfLock, key: []byte{200}},
})
}
// TestResolveRollback4C should rollback all keys in the specified transaction.
func TestResolveRollback4C(t *testing.T) {
builder := newBuilder(t)
cmd := resolveRequest(100, 0)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100, value: []byte{42}},
{cf: engine_util.CfLock, key: []byte{3}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100, value: []byte{43}},
{cf: engine_util.CfLock, key: []byte{7}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0}},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110, value: []byte{44}},
{cf: engine_util.CfLock, key: []byte{200}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.ResolveLockResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(1, 1, 2)
builder.assert([]kv{
{cf: engine_util.CfWrite, key: []byte{3}, ts: 100, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfWrite, key: []byte{7}, ts: 100, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110},
{cf: engine_util.CfLock, key: []byte{200}},
})
}
// TestResolveCommitWritten4C tests a resolve where the matched keys are already committed or rolled back.
func TestResolveCommitWritten4C(t *testing.T) {
builder := newBuilder(t)
cmd := resolveRequest(100, 120)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 120, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100, value: []byte{43}},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 100, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110, value: []byte{44}},
{cf: engine_util.CfLock, key: []byte{200}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.ResolveLockResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(3, 1, 2)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 120},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 100},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110},
{cf: engine_util.CfLock, key: []byte{200}},
})
}
// TestResolveRollbackWritten4C tests a rollback resolve where data has already been committed or rolled back.
func TestResolveRollbackWritten4C(t *testing.T) {
builder := newBuilder(t)
cmd := resolveRequest(100, 0)
builder.init([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100, value: []byte{42}},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 120, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100, value: []byte{43}},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 100, value: []byte{3, 0, 0, 0, 0, 0, 0, 0, 100}},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110, value: []byte{44}},
{cf: engine_util.CfLock, key: []byte{200}, value: []byte{1, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 0}},
})
resp := builder.runOneRequest(cmd).(*kvrpcpb.ResolveLockResponse)
assert.Nil(t, resp.Error)
assert.Nil(t, resp.RegionError)
builder.assertLens(3, 1, 2)
builder.assert([]kv{
{cf: engine_util.CfDefault, key: []byte{3}, ts: 100},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 120},
{cf: engine_util.CfDefault, key: []byte{7}, ts: 100},
{cf: engine_util.CfWrite, key: []byte{201}, ts: 100},
{cf: engine_util.CfDefault, key: []byte{200}, ts: 110},
{cf: engine_util.CfLock, key: []byte{200}},
})
}
// TestScanEmpty4C tests a scan after the end of the DB.
func TestScanEmpty4C(t *testing.T) {
builder := builderForScan(t)
cmd := builder.scanRequest([]byte{200}, 10000)
resp := builder.runOneRequest(cmd).(*kvrpcpb.ScanResponse)
assert.Nil(t, resp.RegionError)
assert.Empty(t, resp.Pairs)
}
// TestScanLimitZero4C tests we get nothing if limit is 0.
func TestScanLimitZero4C(t *testing.T) {
builder := builderForScan(t)
cmd := builder.scanRequest([]byte{3}, 0)
resp := builder.runOneRequest(cmd).(*kvrpcpb.ScanResponse)
assert.Nil(t, resp.RegionError)
assert.Empty(t, resp.Pairs)
}
// TestScanAll4C start at the beginning of the DB and read all pairs, respecting the timestamp.
func TestScanAll4C(t *testing.T) {
builder := builderForScan(t)
cmd := builder.scanRequest([]byte{0}, 10000)
resp := builder.runOneRequest(cmd).(*kvrpcpb.ScanResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, 11, len(resp.Pairs))
assert.Equal(t, []byte{1}, resp.Pairs[0].Key)
assert.Equal(t, []byte{50}, resp.Pairs[0].Value)
assert.Equal(t, []byte{199}, resp.Pairs[10].Key)
assert.Equal(t, []byte{54}, resp.Pairs[10].Value)
}
// TestScanLimit4C tests that scan takes the limit into account.
func TestScanLimit4C(t *testing.T) {
builder := builderForScan(t)
cmd := builder.scanRequest([]byte{2}, 6)
resp := builder.runOneRequest(cmd).(*kvrpcpb.ScanResponse)
assert.Nil(t, resp.RegionError)
assert.Equal(t, 6, len(resp.Pairs))
assert.Equal(t, []byte{3}, resp.Pairs[0].Key)
assert.Equal(t, []byte{51}, resp.Pairs[0].Value)
assert.Equal(t, []byte{4}, resp.Pairs[5].Key)
assert.Equal(t, []byte{52}, resp.Pairs[5].Value)
}
// TestScanDeleted4C scan over a value which is deleted then replaced.
func TestScanDeleted4C(t *testing.T) {
builder := builderForScan(t)
req1 := builder.scanRequest([]byte{100}, 10000)
req1.Version = 100
req2 := builder.scanRequest([]byte{100}, 10000)
req2.Version = 105
req3 := builder.scanRequest([]byte{100}, 10000)
req3.Version = 120
resps := builder.runRequests(req1, req2, req3)
resp1 := resps[0].(*kvrpcpb.ScanResponse)
assert.Nil(t, resp1.RegionError)
assert.Equal(t, 3, len(resp1.Pairs))
assert.Equal(t, []byte{150}, resp1.Pairs[1].Key)
assert.Equal(t, []byte{42}, resp1.Pairs[1].Value)
resp2 := resps[1].(*kvrpcpb.ScanResponse)
assert.Nil(t, resp2.RegionError)
assert.Equal(t, 2, len(resp2.Pairs))
assert.Equal(t, []byte{120}, resp2.Pairs[0].Key)
assert.Equal(t, []byte{199}, resp2.Pairs[1].Key)
resp3 := resps[2].(*kvrpcpb.ScanResponse)
assert.Nil(t, resp3.RegionError)
assert.Equal(t, 3, len(resp3.Pairs))
assert.Equal(t, []byte{150}, resp3.Pairs[1].Key)
assert.Equal(t, []byte{64}, resp3.Pairs[1].Value)
}
func builderForScan(t *testing.T) *testBuilder {
values := []kv{
// Committed before 100.
{engine_util.CfDefault, []byte{1}, 80, []byte{50}},
{engine_util.CfWrite, []byte{1}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{1, 23}, 80, []byte{55}},
{engine_util.CfWrite, []byte{1, 23}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{3}, 80, []byte{51}},
{engine_util.CfWrite, []byte{3}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{3, 45}, 80, []byte{56}},
{engine_util.CfWrite, []byte{3, 45}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{3, 46}, 80, []byte{57}},
{engine_util.CfWrite, []byte{3, 46}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{3, 47}, 80, []byte{58}},
{engine_util.CfWrite, []byte{3, 47}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{3, 48}, 80, []byte{59}},
{engine_util.CfWrite, []byte{3, 48}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{4}, 80, []byte{52}},
{engine_util.CfWrite, []byte{4}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{120}, 80, []byte{53}},
{engine_util.CfWrite, []byte{120}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfDefault, []byte{199}, 80, []byte{54}},
{engine_util.CfWrite, []byte{199}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
// Committed after 100.
{engine_util.CfDefault, []byte{4, 45}, 110, []byte{58}},
{engine_util.CfWrite, []byte{4, 45}, 116, []byte{1, 0, 0, 0, 0, 0, 0, 0, 110}},
{engine_util.CfDefault, []byte{4, 46}, 110, []byte{57}},
{engine_util.CfWrite, []byte{4, 46}, 116, []byte{1, 0, 0, 0, 0, 0, 0, 0, 110}},
{engine_util.CfDefault, []byte{4, 47}, 110, []byte{58}},
{engine_util.CfWrite, []byte{4, 47}, 116, []byte{1, 0, 0, 0, 0, 0, 0, 0, 110}},
{engine_util.CfDefault, []byte{4, 48}, 110, []byte{59}},
{engine_util.CfWrite, []byte{4, 48}, 116, []byte{1, 0, 0, 0, 0, 0, 0, 0, 110}},
// Committed after 100, but started before.
{engine_util.CfDefault, []byte{5, 45}, 97, []byte{60}},
{engine_util.CfWrite, []byte{5, 45}, 101, []byte{1, 0, 0, 0, 0, 0, 0, 0, 97}},
{engine_util.CfDefault, []byte{5, 46}, 97, []byte{61}},
{engine_util.CfWrite, []byte{5, 46}, 101, []byte{1, 0, 0, 0, 0, 0, 0, 0, 97}},
{engine_util.CfDefault, []byte{5, 47}, 97, []byte{62}},
{engine_util.CfWrite, []byte{5, 47}, 101, []byte{1, 0, 0, 0, 0, 0, 0, 0, 97}},
{engine_util.CfDefault, []byte{5, 48}, 97, []byte{63}},
{engine_util.CfWrite, []byte{5, 48}, 101, []byte{1, 0, 0, 0, 0, 0, 0, 0, 97}},
// A deleted value and replaced value.
{engine_util.CfDefault, []byte{150}, 80, []byte{42}},
{engine_util.CfWrite, []byte{150}, 99, []byte{1, 0, 0, 0, 0, 0, 0, 0, 80}},
{engine_util.CfWrite, []byte{150}, 101, []byte{2, 0, 0, 0, 0, 0, 0, 0, 97}},
{engine_util.CfDefault, []byte{150}, 110, []byte{64}},
{engine_util.CfWrite, []byte{150}, 116, []byte{1, 0, 0, 0, 0, 0, 0, 0, 110}},
}
builder := newBuilder(t)
builder.init(values)
return &builder
}

View File

@ -0,0 +1,211 @@
package transaction
// This file contains utility code for testing commands.
import (
"context"
"encoding/binary"
"fmt"
"reflect"
"strings"
"testing"
"github.com/pingcap-incubator/tinykv/kv/server"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/transaction/mvcc"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"github.com/stretchr/testify/assert"
)
// testBuilder is a helper type for running command tests.
type testBuilder struct {
t *testing.T
server *server.Server
// mem will always be the backing store for server.
mem *storage.MemStorage
// Keep track of timestamps.
prevTs uint64
}
// kv is a type which identifies a key/value pair to testBuilder.
type kv struct {
cf string
// The user key (unencoded, no time stamp).
key []byte
// Can be elided. The builder's prevTS will be used if the ts is needed.
ts uint64
// Can be elided in assertion functions. If elided then testBuilder checks that the value has not changed.
value []byte
}
func newBuilder(t *testing.T) testBuilder {
mem := storage.NewMemStorage()
server := server.NewServer(mem)
server.Latches.Validation = func(txn *mvcc.MvccTxn, keys [][]byte) {
keyMap := make(map[string]struct{})
for _, k := range keys {
keyMap[string(k)] = struct{}{}
}
for _, wr := range txn.Writes() {
key := wr.Key()
// This is a bit of a hack and relies on all the raw tests using keys shorter than 9 bytes, which is the
// minimum length for an encoded key.
if len(key) > 8 {
switch wr.Cf() {
case engine_util.CfDefault:
key = mvcc.DecodeUserKey(wr.Key())
case engine_util.CfWrite:
key = mvcc.DecodeUserKey(wr.Key())
}
}
if _, ok := keyMap[string(key)]; !ok {
t.Errorf("Failed latching validation: tried to write a key which was not latched in %v", wr.Data)
}
}
}
return testBuilder{t, server, mem, 99}
}
// init sets values in the test's DB.
func (builder *testBuilder) init(values []kv) {
for _, kv := range values {
ts := kv.ts
if ts == 0 {
ts = builder.prevTs
}
switch kv.cf {
case engine_util.CfDefault:
builder.mem.Set(kv.cf, mvcc.EncodeKey(kv.key, ts), kv.value)
case engine_util.CfWrite:
builder.mem.Set(kv.cf, mvcc.EncodeKey(kv.key, ts), kv.value)
case engine_util.CfLock:
builder.mem.Set(kv.cf, kv.key, kv.value)
}
}
}
func (builder *testBuilder) runRequests(reqs ...interface{}) []interface{} {
var result []interface{}
for _, req := range reqs {
reqName := fmt.Sprintf("%v", reflect.TypeOf(req))
reqName = strings.TrimPrefix(strings.TrimSuffix(reqName, "Request"), "*kvrpcpb.")
fnName := "Kv" + reqName
serverVal := reflect.ValueOf(builder.server)
fn := serverVal.MethodByName(fnName)
ctxtVal := reflect.ValueOf(context.Background())
reqVal := reflect.ValueOf(req)
results := fn.Call([]reflect.Value{ctxtVal, reqVal})
assert.Nil(builder.t, results[1].Interface())
result = append(result, results[0].Interface())
}
return result
}
// runOneCmd is like runCommands but only runs a single command.
func (builder *testBuilder) runOneRequest(req interface{}) interface{} {
return builder.runRequests(req)[0]
}
func (builder *testBuilder) nextTs() uint64 {
builder.prevTs++
return builder.prevTs
}
// ts returns the most recent timestamp used by testBuilder as a byte.
func (builder *testBuilder) ts() byte {
return byte(builder.prevTs)
}
// assert that a key/value pair exists and has the given value, or if there is no value that it is unchanged.
func (builder *testBuilder) assert(kvs []kv) {
for _, kv := range kvs {
var key []byte
ts := kv.ts
if ts == 0 {
ts = builder.prevTs
}
switch kv.cf {
case engine_util.CfDefault:
key = mvcc.EncodeKey(kv.key, ts)
case engine_util.CfWrite:
key = mvcc.EncodeKey(kv.key, ts)
case engine_util.CfLock:
key = kv.key
}
if kv.value == nil {
assert.False(builder.t, builder.mem.HasChanged(kv.cf, key))
} else {
assert.Equal(builder.t, kv.value, builder.mem.Get(kv.cf, key))
}
}
}
// assertLen asserts the size of one of the column families.
func (builder *testBuilder) assertLen(cf string, size int) {
assert.Equal(builder.t, size, builder.mem.Len(cf))
}
// assertLens asserts the size of each column family.
func (builder *testBuilder) assertLens(def int, lock int, write int) {
builder.assertLen(engine_util.CfDefault, def)
builder.assertLen(engine_util.CfLock, lock)
builder.assertLen(engine_util.CfWrite, write)
}
func (builder *testBuilder) prewriteRequest(muts ...*kvrpcpb.Mutation) *kvrpcpb.PrewriteRequest {
var req kvrpcpb.PrewriteRequest
req.PrimaryLock = []byte{1}
req.StartVersion = builder.nextTs()
req.Mutations = muts
return &req
}
func mutation(key byte, value []byte, op kvrpcpb.Op) *kvrpcpb.Mutation {
var mut kvrpcpb.Mutation
mut.Key = []byte{key}
mut.Value = value
mut.Op = op
return &mut
}
func (builder *testBuilder) commitRequest(keys ...[]byte) *kvrpcpb.CommitRequest {
var req kvrpcpb.CommitRequest
req.StartVersion = builder.nextTs()
req.CommitVersion = builder.prevTs + 10
req.Keys = keys
return &req
}
func (builder *testBuilder) rollbackRequest(keys ...[]byte) *kvrpcpb.BatchRollbackRequest {
var req kvrpcpb.BatchRollbackRequest
req.StartVersion = builder.nextTs()
req.Keys = keys
return &req
}
func (builder *testBuilder) checkTxnStatusRequest(key []byte) *kvrpcpb.CheckTxnStatusRequest {
var req kvrpcpb.CheckTxnStatusRequest
builder.nextTs()
req.LockTs = binary.BigEndian.Uint64([]byte{0, 0, 5, 0, 0, 0, 0, builder.ts()})
req.CurrentTs = binary.BigEndian.Uint64([]byte{0, 0, 6, 0, 0, 0, 0, builder.ts()})
req.PrimaryKey = key
return &req
}
func resolveRequest(startTs uint64, commitTs uint64) *kvrpcpb.ResolveLockRequest {
var req kvrpcpb.ResolveLockRequest
req.StartVersion = startTs
req.CommitVersion = commitTs
return &req
}
func (builder *testBuilder) scanRequest(startKey []byte, limit uint32) *kvrpcpb.ScanRequest {
var req kvrpcpb.ScanRequest
req.StartKey = startKey
req.Limit = limit
req.Version = builder.nextTs()
return &req
}

40
kv/transaction/doc.go Normal file
View File

@ -0,0 +1,40 @@
package transaction
// The transaction package implements TinyKV's 'transaction' layer. This takes incoming requests from the kv/server/server.go
// as input and turns them into reads and writes of the underlying key/value store (defined by Storage in kv/storage/storage.go).
// The storage engine handles communicating with other nodes and writing data to disk. The transaction layer must
// translate high-level TinyKV commands into low-level raw key/value commands and ensure that processing of commands do
// not interfere with processing other commands.
//
// Note that there are two kinds of transactions in play: TinySQL transactions are collaborative between TinyKV and its
// client (e.g., TinySQL). They are implemented using multiple TinyKV requests and ensure that multiple SQL commands can
// be executed atomically. There are also mvcc transactions which are an implementation detail of this
// layer in TinyKV (represented by MvccTxn in kv/transaction/mvcc/transaction.go). These ensure that a *single* request
// is executed atomically.
//
// *Locks* are used to implement TinySQL transactions. Setting or checking a lock in a TinySQL transaction is lowered to
// writing to the underlying store.
//
// *Latches* are used to implement mvcc transactions and are not visible to the client. They are stored outside the
// underlying storage (or equivalently, you can think of every key having its own latch). See the latches package for details.
//
// Within the `mvcc` package, `Lock` and `Write` provide abstractions for lowering locks and writes into simple keys and values.
//
// ## Encoding user key/values
//
// The mvcc strategy is essentially to store all data (committed and uncommitted) at every point in time. So for example, if we store
// a value for a key, then store another value (a logical overwrite) at a later time, both values are preserved in the underlying
// storage.
//
// This is implemented by encoding user keys with their timestamps (the starting timestamp of the transaction in which they are
// written) to make an encoded key (see codec.go). The `default` CF is a mapping from encoded keys to their values.
//
// Locking a key means writing into the `lock` CF. In this CF, we use the user key (i.e., not the encoded key so that a key is locked
// for all timestamps). The value in the `lock` CF consists of the 'primary key' for the transaction, the kind of lock (for 'put',
// 'delete', or 'rollback'), the start timestamp of the transaction, and the lock's ttl (time to live). See lock.go for the
// implementation.
//
// The status of values is stored in the `write` CF. Here we map keys encoded with their commit timestamps (i.e., the time at which a
// a transaction is committed) to a value containing the transaction's starting timestamp, and the kind of write ('put', 'delete', or
// 'rollback'). Note that for transactions which are rolled back, the start timestamp is used for the commit timestamp in the encoded
// key.

View File

@ -0,0 +1,99 @@
package latches
import (
"sync"
"github.com/pingcap-incubator/tinykv/kv/transaction/mvcc"
)
// Latching provides atomicity of TinyKV commands. This should not be confused with SQL transactions which provide atomicity
// for multiple TinyKV commands. For example, consider two commit commands, these write to multiple keys/CFs so if they race,
// then it is possible for inconsistent data to be written. By latching the keys each command might write, we ensure that the
// two commands will not race to write the same keys.
//
// A latch is a per-key lock. There is only one latch per user key, not one per CF or one for each encoded key. Latches are
// only needed for writing. Only one thread can hold a latch at a time and all keys that a command might write must be locked
// at once.
//
// Latching is implemented using a single map which maps keys to a Go WaitGroup. Access to this map is guarded by a mutex
// to ensure that latching is atomic and consistent. Since the mutex is a global lock, it would cause intolerable contention
// in a real system.
type Latches struct {
// Before modifying any property of a key, the thread must have the latch for that key. `Latches` maps each latched
// key to a WaitGroup. Threads who find a key locked should wait on that WaitGroup.
latchMap map[string]*sync.WaitGroup
// Mutex to guard latchMap. A thread must hold this mutex while it makes any change to latchMap.
latchGuard sync.Mutex
// An optional validation function, only used for testing.
Validation func(txn *mvcc.MvccTxn, keys [][]byte)
}
// NewLatches creates a new Latches object for managing a databases latches. There should only be one such object, shared
// between all threads.
func NewLatches() *Latches {
l := new(Latches)
l.latchMap = make(map[string]*sync.WaitGroup)
return l
}
// AcquireLatches tries lock all Latches specified by keys. If this succeeds, nil is returned. If any of the keys are
// locked, then AcquireLatches requires a WaitGroup which the thread can use to be woken when the lock is free.
func (l *Latches) AcquireLatches(keysToLatch [][]byte) *sync.WaitGroup {
l.latchGuard.Lock()
defer l.latchGuard.Unlock()
// Check none of the keys we want to write are locked.
for _, key := range keysToLatch {
if latchWg, ok := l.latchMap[string(key)]; ok {
// Return a wait group to wait on.
return latchWg
}
}
// All Latches are available, lock them all with a new wait group.
wg := new(sync.WaitGroup)
wg.Add(1)
for _, key := range keysToLatch {
l.latchMap[string(key)] = wg
}
return nil
}
// ReleaseLatches releases the latches for all keys in keysToUnlatch. It will wakeup any threads blocked on one of the
// latches. All keys in keysToUnlatch must have been locked together in one call to AcquireLatches.
func (l *Latches) ReleaseLatches(keysToUnlatch [][]byte) {
l.latchGuard.Lock()
defer l.latchGuard.Unlock()
first := true
for _, key := range keysToUnlatch {
if first {
wg := l.latchMap[string(key)]
wg.Done()
first = false
}
delete(l.latchMap, string(key))
}
}
// WaitForLatches attempts to lock all keys in keysToLatch using AcquireLatches. If a latch ia already locked, then =
// WaitForLatches will wait for it to become unlocked then try again. Therefore WaitForLatches may block for an unbounded
// length of time.
func (l *Latches) WaitForLatches(keysToLatch [][]byte) {
for {
wg := l.AcquireLatches(keysToLatch)
if wg == nil {
return
}
wg.Wait()
}
}
// Validate calls the function in Validation, if it exists.
func (l *Latches) Validate(txn *mvcc.MvccTxn, latched [][]byte) {
if l.Validation != nil {
l.Validation(txn, latched)
}
}

View File

@ -0,0 +1,30 @@
package latches
import (
"github.com/stretchr/testify/assert"
"sync"
"testing"
)
func TestAcquireLatches(t *testing.T) {
l := Latches{
latchMap: make(map[string]*sync.WaitGroup),
}
// Acquiring a new latch is ok.
wg := l.AcquireLatches([][]byte{{}, {3}, {3, 0, 42}})
assert.Nil(t, wg)
// Can only acquire once.
wg = l.AcquireLatches([][]byte{{}})
assert.NotNil(t, wg)
wg = l.AcquireLatches([][]byte{{3, 0, 42}})
assert.NotNil(t, wg)
// Release then acquire is ok.
l.ReleaseLatches([][]byte{{3}, {3, 0, 43}})
wg = l.AcquireLatches([][]byte{{3}})
assert.Nil(t, wg)
wg = l.AcquireLatches([][]byte{{3, 0, 42}})
assert.NotNil(t, wg)
}

105
kv/transaction/mvcc/lock.go Normal file
View File

@ -0,0 +1,105 @@
package mvcc
import (
"bytes"
"encoding/binary"
"fmt"
"reflect"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
)
const TsMax uint64 = ^uint64(0)
type Lock struct {
Primary []byte
Ts uint64
Ttl uint64
Kind WriteKind
}
type KlPair struct {
Key []byte
Lock *Lock
}
// Info creates a LockInfo object from a Lock object for key.
func (lock *Lock) Info(key []byte) *kvrpcpb.LockInfo {
info := kvrpcpb.LockInfo{}
info.Key = key
info.LockVersion = lock.Ts
info.PrimaryLock = lock.Primary
info.LockTtl = lock.Ttl
return &info
}
func (lock *Lock) ToBytes() []byte {
buf := append(lock.Primary, byte(lock.Kind))
buf = append(buf, make([]byte, 16)...)
binary.BigEndian.PutUint64(buf[len(lock.Primary)+1:], lock.Ts)
binary.BigEndian.PutUint64(buf[len(lock.Primary)+9:], lock.Ttl)
return buf
}
// ParseLock attempts to parse a byte string into a Lock object.
func ParseLock(input []byte) (*Lock, error) {
if len(input) <= 16 {
return nil, fmt.Errorf("mvcc: error parsing lock, not enough input, found %d bytes", len(input))
}
primaryLen := len(input) - 17
primary := input[:primaryLen]
kind := WriteKind(input[primaryLen])
ts := binary.BigEndian.Uint64(input[primaryLen+1:])
ttl := binary.BigEndian.Uint64(input[primaryLen+9:])
return &Lock{Primary: primary, Ts: ts, Ttl: ttl, Kind: kind}, nil
}
// IsLockedFor checks if lock locks key at txnStartTs.
func (lock *Lock) IsLockedFor(key []byte, txnStartTs uint64, resp interface{}) bool {
if lock == nil {
return false
}
if txnStartTs == TsMax && bytes.Compare(key, lock.Primary) != 0 {
return false
}
if lock.Ts <= txnStartTs {
err := &kvrpcpb.KeyError{Locked: lock.Info(key)}
respValue := reflect.ValueOf(resp)
reflect.Indirect(respValue).FieldByName("Error").Set(reflect.ValueOf(err))
return true
}
return false
}
// AllLocksForTxn returns all locks for the current transaction.
func AllLocksForTxn(txn *MvccTxn) ([]KlPair, error) {
var result []KlPair
for iter := txn.Reader().IterCF(engine_util.CfLock); iter.Valid(); iter.Next() {
item := iter.Item()
val, err := item.Value()
if err != nil {
return nil, err
}
lock, err := ParseLock(val)
if err != nil {
return nil, err
}
if lock.Ts == txn.StartTS() {
result = append(result, KlPair{item.Key(), lock})
}
}
return result, nil
}
func LockedError(info ...kvrpcpb.LockInfo) []*kvrpcpb.KeyError {
var result []*kvrpcpb.KeyError
for _, i := range info {
var ke kvrpcpb.KeyError
ke.Locked = &i
result = append(result, &ke)
}
return result
}

View File

@ -0,0 +1,111 @@
package mvcc
import (
"encoding/binary"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/pingcap-incubator/tinykv/kv/util/codec"
)
// MvccTxn groups together writes as part of a single transaction. It also provides an abstraction over low-level
// storage, lowering the concepts of timestamps, writes, and locks into plain keys and values.
type MvccTxn struct {
// Your code here (4a).
}
func (txn *MvccTxn) Reader() storage.StorageReader {
// Your code here (4a).
return nil
}
func (txn *MvccTxn) StartTS() uint64 {
// Your code here (4a).
return 0
}
// Writes returns all changes added to this transaction.
func (txn *MvccTxn) Writes() []storage.Modify {
// Your code here (4a).
return nil
}
// PutWrite records a write at key and ts.
func (txn *MvccTxn) PutWrite(key []byte, ts uint64, write *Write) {
// Your code here (4a).
}
// GetLock returns a lock if key is locked. It will return (nil, nil) if there is no lock on key, and (nil, err)
// if an error occurs during lookup.
func (txn *MvccTxn) GetLock(key []byte) (*Lock, error) {
// Your code here (4a).
return nil, nil
}
// PutLock adds a key/lock to this transaction.
func (txn *MvccTxn) PutLock(key []byte, lock *Lock) {
// Your code here (4a).
}
// DeleteLock adds a delete lock to this transaction.
func (txn *MvccTxn) DeleteLock(key []byte) {
// Your code here (4a).
}
// GetValue finds the value for key, valid at the start timestamp of this transaction.
// I.e., the most recent value committed before the start of this transaction.
func (txn *MvccTxn) GetValue(key []byte) ([]byte, error) {
// Your code here (4a).
return nil, nil
}
// PutValue adds a key/value write to this transaction.
func (txn *MvccTxn) PutValue(key []byte, value []byte) {
// Your code here (4a).
}
// DeleteValue removes a key/value pair in this transaction.
func (txn *MvccTxn) DeleteValue(key []byte) {
// Your code here (4a).
}
// CurrentWrite searches for a write with this transaction's start timestamp. It returns a Write from the DB and that
// write's commit timestamp, or an error.
func (txn *MvccTxn) CurrentWrite(key []byte) (*Write, uint64, error) {
// Your code here (4a).
return nil, 0, nil
}
// MostRecentWrite finds the most recent write with the given key. It returns a Write from the DB and that
// write's commit timestamp, or an error.
func (txn *MvccTxn) MostRecentWrite(key []byte) (*Write, uint64, error) {
// Your code here (4a).
return nil, 0, nil
}
// EncodeKey encodes a user key and appends an encoded timestamp to a key. Keys and timestamps are encoded so that
// timestamped keys are sorted first by key (ascending), then by timestamp (descending). The encoding is based on
// https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format.
func EncodeKey(key []byte, ts uint64) []byte {
encodedKey := codec.EncodeBytes(key)
newKey := append(encodedKey, make([]byte, 8)...)
binary.BigEndian.PutUint64(newKey[len(encodedKey):], ^ts)
return newKey
}
// DecodeUserKey takes a key + timestamp and returns the key part.
func DecodeUserKey(key []byte) []byte {
_, userKey, err := codec.DecodeBytes(key)
if err != nil {
panic(err)
}
return userKey
}
// decodeTimestamp takes a key + timestamp and returns the timestamp part.
func decodeTimestamp(key []byte) uint64 {
left, _, err := codec.DecodeBytes(key)
if err != nil {
panic(err)
}
return ^binary.BigEndian.Uint64(left)
}

View File

@ -0,0 +1,296 @@
package mvcc
import (
"bytes"
"github.com/pingcap-incubator/tinykv/kv/util/engine_util"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
"testing"
"github.com/pingcap-incubator/tinykv/kv/storage"
"github.com/stretchr/testify/assert"
)
func TestEncodeKey(t *testing.T) {
assert.Equal(t, []byte{0, 0, 0, 0, 0, 0, 0, 0, 247, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, EncodeKey([]byte{}, 0))
assert.Equal(t, []byte{42, 0, 0, 0, 0, 0, 0, 0, 248, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, EncodeKey([]byte{42}, 0))
assert.Equal(t, []byte{42, 0, 5, 0, 0, 0, 0, 0, 250, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, EncodeKey([]byte{42, 0, 5}, 0))
assert.Equal(t, []byte{42, 0, 0, 0, 0, 0, 0, 0, 248, 0, 0, 39, 154, 52, 120, 65, 255}, EncodeKey([]byte{42}, ^uint64(43543258743295)))
assert.Equal(t, []byte{42, 0, 5, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 5, 226, 221, 76}, EncodeKey([]byte{42, 0, 5}, ^uint64(98753868)))
// Test that encoded keys are in descending order.
assert.True(t, bytes.Compare(EncodeKey([]byte{42}, 238), EncodeKey([]byte{200}, 0)) < 0)
assert.True(t, bytes.Compare(EncodeKey([]byte{42}, 238), EncodeKey([]byte{42, 0}, 0)) < 0)
}
func TestDecodeKey(t *testing.T) {
assert.Equal(t, []byte{}, DecodeUserKey(EncodeKey([]byte{}, 0)))
assert.Equal(t, []byte{42}, DecodeUserKey(EncodeKey([]byte{42}, 0)))
assert.Equal(t, []byte{42, 0, 5}, DecodeUserKey(EncodeKey([]byte{42, 0, 5}, 0)))
assert.Equal(t, []byte{42}, DecodeUserKey(EncodeKey([]byte{42}, 2342342355436234)))
assert.Equal(t, []byte{42, 0, 5}, DecodeUserKey(EncodeKey([]byte{42, 0, 5}, 234234)))
}
func testTxn(startTs uint64, f func(m *storage.MemStorage)) MvccTxn {
mem := storage.NewMemStorage()
if f != nil {
f(mem)
}
reader, _ := mem.Reader(&kvrpcpb.Context{})
return NewTxn(reader, startTs)
}
func assertPutInTxn(t *testing.T, txn *MvccTxn, key []byte, value []byte, cf string) {
writes := txn.Writes()
assert.Equal(t, 1, len(writes))
expected := storage.Put{Cf: cf, Key: key, Value: value}
put, ok := writes[0].Data.(storage.Put)
assert.True(t, ok)
assert.Equal(t, expected, put)
}
func assertDeleteInTxn(t *testing.T, txn *MvccTxn, key []byte, cf string) {
writes := txn.Writes()
assert.Equal(t, 1, len(writes))
expected := storage.Delete{Cf: cf, Key: key}
del, ok := writes[0].Data.(storage.Delete)
assert.True(t, ok)
assert.Equal(t, expected, del)
}
func TestPutLock4A(t *testing.T) {
txn := testTxn(42, nil)
lock := Lock{
Primary: []byte{16},
Ts: 100,
Ttl: 100000,
Kind: WriteKindRollback,
}
txn.PutLock([]byte{1}, &lock)
assertPutInTxn(t, &txn, []byte{1}, lock.ToBytes(), engine_util.CfLock)
}
func TestPutWrite4A(t *testing.T) {
txn := testTxn(0, nil)
write := Write{
StartTS: 100,
Kind: WriteKindDelete,
}
txn.PutWrite([]byte{16, 240}, 0, &write)
assertPutInTxn(t, &txn, EncodeKey([]byte{16, 240}, 0), write.ToBytes(), engine_util.CfWrite)
}
func TestPutValue4A(t *testing.T) {
txn := testTxn(453325345, nil)
value := []byte{1, 1, 2, 3, 5, 8, 13}
txn.PutValue([]byte{32}, value)
assertPutInTxn(t, &txn, EncodeKey([]byte{32}, 453325345), value, engine_util.CfDefault)
}
func TestGetLock4A(t *testing.T) {
lock := Lock{
Primary: []byte{16},
Ts: 100,
Ttl: 100000,
Kind: WriteKindRollback,
}
txn := testTxn(42, func(m *storage.MemStorage) {
m.Set(engine_util.CfLock, []byte{1}, lock.ToBytes())
})
gotLock, err := txn.GetLock([]byte{1})
assert.Nil(t, err)
assert.Equal(t, lock, *gotLock)
}
func TestDeleteLock4A(t *testing.T) {
txn := testTxn(42, nil)
txn.DeleteLock([]byte{1})
assertDeleteInTxn(t, &txn, []byte{1}, engine_util.CfLock)
}
func TestDeleteValue4A(t *testing.T) {
txn := testTxn(63454245, nil)
txn.DeleteValue([]byte{17, 255, 0})
assertDeleteInTxn(t, &txn, EncodeKey([]byte{17, 255, 0}, 63454245), engine_util.CfDefault)
}
func singleEntry(m *storage.MemStorage) {
m.Set(engine_util.CfDefault, EncodeKey([]byte{16, 240}, 40), []byte{1, 2, 3})
write := Write{
StartTS: 40,
Kind: WriteKindPut,
}
m.Set(engine_util.CfWrite, EncodeKey([]byte{16, 240}, 42), write.ToBytes())
}
func TestGetValueSimple4A(t *testing.T) {
txn := testTxn(43, singleEntry)
value, err := txn.GetValue([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, []byte{1, 2, 3}, value)
}
func TestGetValueMissing4A(t *testing.T) {
txn := testTxn(43, singleEntry)
value, err := txn.GetValue([]byte{16, 241})
assert.Nil(t, err)
assert.Equal(t, []byte(nil), value)
}
func TestGetValueTooEarly4A(t *testing.T) {
txn := testTxn(41, singleEntry)
value, err := txn.GetValue([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, []byte(nil), value)
}
func twoEntries(m *storage.MemStorage) {
m.Set(engine_util.CfDefault, EncodeKey([]byte{16, 240}, 40), []byte{1, 2, 3})
write1 := Write{
StartTS: 40,
Kind: WriteKindPut,
}
m.Set(engine_util.CfWrite, EncodeKey([]byte{16, 240}, 42), write1.ToBytes())
m.Set(engine_util.CfDefault, EncodeKey([]byte{16, 240}, 50), []byte{255, 0, 255})
write2 := Write{
StartTS: 50,
Kind: WriteKindPut,
}
m.Set(engine_util.CfWrite, EncodeKey([]byte{16, 240}, 52), write2.ToBytes())
}
func TestGetValueOverwritten4A(t *testing.T) {
txn := testTxn(52, twoEntries)
value, err := txn.GetValue([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, []byte{255, 0, 255}, value)
}
func TestGetValueNotOverwritten4A(t *testing.T) {
txn := testTxn(50, twoEntries)
value, err := txn.GetValue([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, []byte{1, 2, 3}, value)
}
func deleted(m *storage.MemStorage) {
m.Set(engine_util.CfDefault, EncodeKey([]byte{16, 240}, 40), []byte{1, 2, 3})
write1 := Write{
StartTS: 40,
Kind: WriteKindPut,
}
m.Set(engine_util.CfWrite, EncodeKey([]byte{16, 240}, 42), write1.ToBytes())
write2 := Write{
StartTS: 50,
Kind: WriteKindDelete,
}
m.Set(engine_util.CfWrite, EncodeKey([]byte{16, 240}, 52), write2.ToBytes())
}
func TestGetValueDeleted4A(t *testing.T) {
txn := testTxn(500, deleted)
value, err := txn.GetValue([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, []byte(nil), value)
}
func TestGetValueNotDeleted4A(t *testing.T) {
txn := testTxn(45, deleted)
value, err := txn.GetValue([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, []byte{1, 2, 3}, value)
}
func TestCurrentWrite4A(t *testing.T) {
txn := testTxn(50, twoEntries)
write, ts, err := txn.CurrentWrite([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, Write{
StartTS: 50,
Kind: WriteKindPut,
}, *write)
assert.Equal(t, uint64(52), ts)
txn.StartTS = 40
write, ts, err = txn.CurrentWrite([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, Write{
StartTS: 40,
Kind: WriteKindPut,
}, *write)
assert.Equal(t, uint64(42), ts)
txn.StartTS = 41
write, ts, err = txn.CurrentWrite([]byte{16, 240})
assert.Nil(t, err)
var noWrite *Write
assert.Equal(t, noWrite, write)
assert.Equal(t, uint64(0), ts)
}
func TestMostRecentWrite4A(t *testing.T) {
// Empty DB.
txn := testTxn(50, nil)
write, ts, err := txn.MostRecentWrite([]byte{16, 240})
assert.Nil(t, write)
assert.Equal(t, uint64(0), ts)
assert.Nil(t, err)
// Simple case - key exists.
txn = testTxn(50, twoEntries)
write, ts, err = txn.MostRecentWrite([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, Write{
StartTS: 50,
Kind: WriteKindPut,
}, *write)
assert.Equal(t, uint64(52), ts)
// No entry for other keys.
write, ts, err = txn.MostRecentWrite([]byte{16})
assert.Nil(t, write)
assert.Equal(t, uint64(0), ts)
assert.Nil(t, err)
// Deleted key.
txn = testTxn(50, deleted)
write, ts, err = txn.MostRecentWrite([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, Write{
StartTS: 50,
Kind: WriteKindDelete,
}, *write)
assert.Equal(t, uint64(52), ts)
// Result does not depend on txn ts.
txn = testTxn(5000, twoEntries)
write, ts, err = txn.MostRecentWrite([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, Write{
StartTS: 50,
Kind: WriteKindPut,
}, *write)
assert.Equal(t, uint64(52), ts)
// Result does not depend on txn ts.
txn = testTxn(1, twoEntries)
write, ts, err = txn.MostRecentWrite([]byte{16, 240})
assert.Nil(t, err)
assert.Equal(t, Write{
StartTS: 50,
Kind: WriteKindPut,
}, *write)
assert.Equal(t, uint64(52), ts)
}

View File

@ -0,0 +1,69 @@
package mvcc
import (
"encoding/binary"
"fmt"
"github.com/pingcap-incubator/tinykv/proto/pkg/kvrpcpb"
)
// Write is a representation of a committed write to backing storage.
// A serialized version is stored in the "write" CF of our engine when a write is committed. That allows MvccTxn to find
// the status of a key at a given timestamp.
type Write struct {
StartTS uint64
Kind WriteKind
}
func (wr *Write) ToBytes() []byte {
buf := append([]byte{byte(wr.Kind)}, 0, 0, 0, 0, 0, 0, 0, 0)
binary.BigEndian.PutUint64(buf[1:], wr.StartTS)
return buf
}
func ParseWrite(value []byte) (*Write, error) {
if value == nil {
return nil, nil
}
if len(value) != 9 {
return nil, fmt.Errorf("mvcc/write/ParseWrite: value is incorrect length, expected 9, found %d", len(value))
}
kind := value[0]
startTs := binary.BigEndian.Uint64(value[1:])
return &Write{startTs, WriteKind(kind)}, nil
}
type WriteKind int
const (
WriteKindPut WriteKind = 1
WriteKindDelete WriteKind = 2
WriteKindRollback WriteKind = 3
)
func (wk WriteKind) ToProto() kvrpcpb.Op {
switch wk {
case WriteKindPut:
return kvrpcpb.Op_Put
case WriteKindDelete:
return kvrpcpb.Op_Del
case WriteKindRollback:
return kvrpcpb.Op_Rollback
}
return -1
}
func WriteKindFromProto(op kvrpcpb.Op) WriteKind {
switch op {
case kvrpcpb.Op_Put:
return WriteKindPut
case kvrpcpb.Op_Del:
return WriteKindDelete
case kvrpcpb.Op_Rollback:
return WriteKindRollback
}
return -1
}

84
kv/util/codec/codec.go Normal file
View File

@ -0,0 +1,84 @@
package codec
import (
"fmt"
"github.com/pingcap/errors"
)
const (
encGroupSize = 8
encMarker = byte(0xFF)
encPad = byte(0x0)
)
var pads = make([]byte, encGroupSize)
// EncodeBytes guarantees the encoded value is in ascending order for comparison,
// encoding with the following rule:
// [group1][marker1]...[groupN][markerN]
// group is 8 bytes slice which is padding with 0.
// marker is `0xFF - padding 0 count`
// For example:
// [] -> [0, 0, 0, 0, 0, 0, 0, 0, 247]
// [1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250]
// [1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251]
// [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247]
// Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format
func EncodeBytes(data []byte) []byte {
// Allocate more space to avoid unnecessary slice growing.
// Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes,
// that is `(len(data) / 8 + 1) * 9` in our implement.
dLen := len(data)
result := make([]byte, 0, (dLen/encGroupSize+1)*(encGroupSize+1)+8) // make extra room for appending ts
for idx := 0; idx <= dLen; idx += encGroupSize {
remain := dLen - idx
padCount := 0
if remain >= encGroupSize {
result = append(result, data[idx:idx+encGroupSize]...)
} else {
padCount = encGroupSize - remain
result = append(result, data[idx:]...)
result = append(result, pads[:padCount]...)
}
marker := encMarker - byte(padCount)
result = append(result, marker)
}
return result
}
// DecodeBytes decodes bytes which is encoded by EncodeBytes before,
// returns the leftover bytes and decoded value if no error.
func DecodeBytes(b []byte) ([]byte, []byte, error) {
data := make([]byte, 0, len(b))
for {
if len(b) < encGroupSize+1 {
return nil, nil, fmt.Errorf("insufficient bytes to decode value: %d", len(b))
}
groupBytes := b[:encGroupSize+1]
group := groupBytes[:encGroupSize]
marker := groupBytes[encGroupSize]
padCount := encMarker - marker
if padCount > encGroupSize {
return nil, nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes)
}
realGroupSize := encGroupSize - padCount
data = append(data, group[:realGroupSize]...)
b = b[encGroupSize+1:]
if padCount != 0 {
// Check validity of padding bytes.
for _, v := range group[realGroupSize:] {
if v != encPad {
return nil, nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes)
}
}
break
}
}
return b, data, nil
}

View File

@ -0,0 +1,129 @@
package engine_util
import (
"github.com/Connor1996/badger"
)
type CFItem struct {
item *badger.Item
prefixLen int
}
// String returns a string representation of Item
func (i *CFItem) String() string {
return i.item.String()
}
func (i *CFItem) Key() []byte {
return i.item.Key()[i.prefixLen:]
}
func (i *CFItem) KeyCopy(dst []byte) []byte {
return i.item.KeyCopy(dst)[i.prefixLen:]
}
func (i *CFItem) Version() uint64 {
return i.item.Version()
}
func (i *CFItem) IsEmpty() bool {
return i.item.IsEmpty()
}
func (i *CFItem) Value() ([]byte, error) {
return i.item.Value()
}
func (i *CFItem) ValueSize() int {
return i.item.ValueSize()
}
func (i *CFItem) ValueCopy(dst []byte) ([]byte, error) {
return i.item.ValueCopy(dst)
}
func (i *CFItem) IsDeleted() bool {
return i.item.IsDeleted()
}
func (i *CFItem) EstimatedSize() int64 {
return i.item.EstimatedSize()
}
func (i *CFItem) UserMeta() []byte {
return i.item.UserMeta()
}
type BadgerIterator struct {
iter *badger.Iterator
prefix string
}
func NewCFIterator(cf string, txn *badger.Txn) *BadgerIterator {
return &BadgerIterator{
iter: txn.NewIterator(badger.DefaultIteratorOptions),
prefix: cf + "_",
}
}
func (it *BadgerIterator) Item() DBItem {
return &CFItem{
item: it.iter.Item(),
prefixLen: len(it.prefix),
}
}
func (it *BadgerIterator) Valid() bool { return it.iter.ValidForPrefix([]byte(it.prefix)) }
func (it *BadgerIterator) ValidForPrefix(prefix []byte) bool {
return it.iter.ValidForPrefix(append(prefix, []byte(it.prefix)...))
}
func (it *BadgerIterator) Close() {
it.iter.Close()
}
func (it *BadgerIterator) Next() {
it.iter.Next()
}
func (it *BadgerIterator) Seek(key []byte) {
it.iter.Seek(append([]byte(it.prefix), key...))
}
func (it *BadgerIterator) Rewind() {
it.iter.Rewind()
}
type DBIterator interface {
// Item returns pointer to the current key-value pair.
Item() DBItem
// Valid returns false when iteration is done.
Valid() bool
// Next would advance the iterator by one. Always check it.Valid() after a Next()
// to ensure you have access to a valid it.Item().
Next()
// Seek would seek to the provided key if present. If absent, it would seek to the next smallest key
// greater than provided.
Seek([]byte)
// Close the iterator
Close()
}
type DBItem interface {
// Key returns the key.
Key() []byte
// KeyCopy returns a copy of the key of the item, writing it to dst slice.
// If nil is passed, or capacity of dst isn't sufficient, a new slice would be allocated and
// returned.
KeyCopy(dst []byte) []byte
// Value retrieves the value of the item.
Value() ([]byte, error)
// ValueSize returns the size of the value.
ValueSize() int
// ValueCopy returns a copy of the value of the item from the value log, writing it to dst slice.
// If nil is passed, or capacity of dst isn't sufficient, a new slice would be allocated and
// returned.
ValueCopy(dst []byte) ([]byte, error)
}

View File

@ -0,0 +1,17 @@
package engine_util
/*
An engine is a low-level system for storing key/value pairs locally (without distribution or any transaction support,
etc.). This package contains code for interacting with such engines.
CF means 'column family'. A good description of column families is given in https://github.com/facebook/rocksdb/wiki/Column-Families
(specifically for RocksDB, but the general concepts are universal). In short, a column family is a key namespace.
Multiple column families are usually implemented as almost separate databases. Importantly each column family can be
configured separately. Writes can be made atomic across column families, which cannot be done for separate databases.
engine_util includes the following packages:
* engines: a data structure for keeping engines required by unistore.
* write_batch: code to batch writes into a single, atomic 'transaction'.
* cf_iterator: code to iterate over a whole column family in badger.
*/

View File

@ -0,0 +1,83 @@
package engine_util
import (
"bytes"
"io/ioutil"
"testing"
"github.com/Connor1996/badger"
"github.com/stretchr/testify/require"
)
func TestEngineUtil(t *testing.T) {
dir, err := ioutil.TempDir("", "engine_util")
opts := badger.DefaultOptions
opts.Dir = dir
opts.ValueDir = dir
db, err := badger.Open(opts)
require.Nil(t, err)
batch := new(WriteBatch)
batch.SetCF(CfDefault, []byte("a"), []byte("a1"))
batch.SetCF(CfDefault, []byte("b"), []byte("b1"))
batch.SetCF(CfDefault, []byte("c"), []byte("c1"))
batch.SetCF(CfDefault, []byte("d"), []byte("d1"))
batch.SetCF(CfWrite, []byte("a"), []byte("a2"))
batch.SetCF(CfWrite, []byte("b"), []byte("b2"))
batch.SetCF(CfWrite, []byte("d"), []byte("d2"))
batch.SetCF(CfLock, []byte("a"), []byte("a3"))
batch.SetCF(CfLock, []byte("c"), []byte("c3"))
batch.SetCF(CfDefault, []byte("e"), []byte("e1"))
batch.DeleteCF(CfDefault, []byte("e"))
err = batch.WriteToDB(db)
require.Nil(t, err)
_, err = GetCF(db, CfDefault, []byte("e"))
require.Equal(t, err, badger.ErrKeyNotFound)
txn := db.NewTransaction(false)
defer txn.Discard()
defaultIter := NewCFIterator(CfDefault, txn)
defaultIter.Seek([]byte("a"))
item := defaultIter.Item()
require.True(t, bytes.Equal(item.Key(), []byte("a")))
val, _ := item.Value()
require.True(t, bytes.Equal(val, []byte("a1")))
defaultIter.Next()
item = defaultIter.Item()
require.True(t, bytes.Equal(item.Key(), []byte("b")))
val, _ = item.Value()
require.True(t, bytes.Equal(val, []byte("b1")))
defaultIter.Next()
item = defaultIter.Item()
require.True(t, bytes.Equal(item.Key(), []byte("c")))
val, _ = item.Value()
require.True(t, bytes.Equal(val, []byte("c1")))
defaultIter.Next()
item = defaultIter.Item()
require.True(t, bytes.Equal(item.Key(), []byte("d")))
val, _ = item.Value()
require.True(t, bytes.Equal(val, []byte("d1")))
defaultIter.Next()
require.False(t, defaultIter.Valid())
defaultIter.Close()
writeIter := NewCFIterator(CfWrite, txn)
writeIter.Seek([]byte("b"))
item = writeIter.Item()
require.True(t, bytes.Equal(item.Key(), []byte("b")))
val, _ = item.Value()
require.True(t, bytes.Equal(val, []byte("b2")))
writeIter.Next()
item = writeIter.Item()
require.True(t, bytes.Equal(item.Key(), []byte("d")))
val, _ = item.Value()
require.True(t, bytes.Equal(val, []byte("d2")))
writeIter.Next()
require.False(t, writeIter.Valid())
writeIter.Close()
lockIter := NewCFIterator(CfLock, txn)
lockIter.Seek([]byte("d"))
require.False(t, lockIter.Valid())
lockIter.Close()
}

View File

@ -0,0 +1,82 @@
package engine_util
import (
"os"
"path/filepath"
"github.com/Connor1996/badger"
"github.com/pingcap-incubator/tinykv/kv/config"
"github.com/pingcap-incubator/tinykv/log"
)
// Engines keeps references to and data for the engines used by unistore.
// All engines are badger key/value databases.
// the Path fields are the filesystem path to where the data is stored.
type Engines struct {
// Data, including data which is committed (i.e., committed across other nodes) and un-committed (i.e., only present
// locally).
Kv *badger.DB
KvPath string
// Metadata used by Raft.
Raft *badger.DB
RaftPath string
}
func NewEngines(kvEngine, raftEngine *badger.DB, kvPath, raftPath string) *Engines {
return &Engines{
Kv: kvEngine,
KvPath: kvPath,
Raft: raftEngine,
RaftPath: raftPath,
}
}
func (en *Engines) WriteKV(wb *WriteBatch) error {
return wb.WriteToDB(en.Kv)
}
func (en *Engines) WriteRaft(wb *WriteBatch) error {
return wb.WriteToDB(en.Raft)
}
func (en *Engines) Close() error {
if err := en.Kv.Close(); err != nil {
return err
}
if err := en.Raft.Close(); err != nil {
return err
}
return nil
}
func (en *Engines) Destroy() error {
if err := en.Close(); err != nil {
return err
}
if err := os.RemoveAll(en.KvPath); err != nil {
return err
}
if err := os.RemoveAll(en.RaftPath); err != nil {
return err
}
return nil
}
// CreateDB creates a new Badger DB on disk at subPath.
func CreateDB(subPath string, conf *config.Config) *badger.DB {
opts := badger.DefaultOptions
if subPath == "raft" {
// Do not need to write blob for raft engine because it will be deleted soon.
opts.ValueThreshold = 0
}
opts.Dir = filepath.Join(conf.DBPath, subPath)
opts.ValueDir = opts.Dir
if err := os.MkdirAll(opts.Dir, os.ModePerm); err != nil {
log.Fatal(err)
}
db, err := badger.Open(opts)
if err != nil {
log.Fatal(err)
}
return db
}

108
kv/util/engine_util/util.go Normal file
View File

@ -0,0 +1,108 @@
package engine_util
import (
"bytes"
"github.com/Connor1996/badger"
"github.com/golang/protobuf/proto"
)
func KeyWithCF(cf string, key []byte) []byte {
return append([]byte(cf+"_"), key...)
}
func GetCF(db *badger.DB, cf string, key []byte) (val []byte, err error) {
err = db.View(func(txn *badger.Txn) error {
item, err := txn.Get(KeyWithCF(cf, key))
if err != nil {
return err
}
val, err = item.ValueCopy(val)
return err
})
return
}
func GetCFFromTxn(txn *badger.Txn, cf string, key []byte) (val []byte, err error) {
item, err := txn.Get(KeyWithCF(cf, key))
if err != nil {
return nil, err
}
val, err = item.ValueCopy(val)
return
}
func PutCF(engine *badger.DB, cf string, key []byte, val []byte) error {
return engine.Update(func(txn *badger.Txn) error {
return txn.Set(KeyWithCF(cf, key), val)
})
}
func GetMeta(engine *badger.DB, key []byte, msg proto.Message) error {
var val []byte
err := engine.View(func(txn *badger.Txn) error {
item, err := txn.Get(key)
if err != nil {
return err
}
val, err = item.Value()
return err
})
if err != nil {
return err
}
return proto.Unmarshal(val, msg)
}
func GetMetaFromTxn(txn *badger.Txn, key []byte, msg proto.Message) error {
item, err := txn.Get(key)
if err != nil {
return err
}
val, err := item.Value()
if err != nil {
return err
}
return proto.Unmarshal(val, msg)
}
func PutMeta(engine *badger.DB, key []byte, msg proto.Message) error {
val, err := proto.Marshal(msg)
if err != nil {
return err
}
return engine.Update(func(txn *badger.Txn) error {
return txn.Set(key, val)
})
}
func DeleteRange(db *badger.DB, startKey, endKey []byte) error {
batch := new(WriteBatch)
txn := db.NewTransaction(false)
defer txn.Discard()
for _, cf := range CFs {
deleteRangeCF(txn, batch, cf, startKey, endKey)
}
return batch.WriteToDB(db)
}
func deleteRangeCF(txn *badger.Txn, batch *WriteBatch, cf string, startKey, endKey []byte) {
it := NewCFIterator(cf, txn)
for it.Seek(startKey); it.Valid(); it.Next() {
item := it.Item()
key := item.KeyCopy(nil)
if ExceedEndKey(key, endKey) {
break
}
batch.DeleteCF(cf, key)
}
defer it.Close()
}
func ExceedEndKey(current, endKey []byte) bool {
if len(endKey) == 0 {
return false
}
return bytes.Compare(current, endKey) >= 0
}

View File

@ -0,0 +1,110 @@
package engine_util
import (
"github.com/Connor1996/badger"
"github.com/golang/protobuf/proto"
"github.com/pingcap/errors"
)
type WriteBatch struct {
entries []*badger.Entry
size int
safePoint int
safePointSize int
safePointUndo int
}
const (
CfDefault string = "default"
CfWrite string = "write"
CfLock string = "lock"
)
var CFs [3]string = [3]string{CfDefault, CfWrite, CfLock}
func (wb *WriteBatch) Len() int {
return len(wb.entries)
}
func (wb *WriteBatch) SetCF(cf string, key, val []byte) {
wb.entries = append(wb.entries, &badger.Entry{
Key: KeyWithCF(cf, key),
Value: val,
})
wb.size += len(key) + len(val)
}
func (wb *WriteBatch) DeleteMeta(key []byte) {
wb.entries = append(wb.entries, &badger.Entry{
Key: key,
})
wb.size += len(key)
}
func (wb *WriteBatch) DeleteCF(cf string, key []byte) {
wb.entries = append(wb.entries, &badger.Entry{
Key: KeyWithCF(cf, key),
})
wb.size += len(key)
}
func (wb *WriteBatch) SetMeta(key []byte, msg proto.Message) error {
val, err := proto.Marshal(msg)
if err != nil {
return errors.WithStack(err)
}
wb.entries = append(wb.entries, &badger.Entry{
Key: key,
Value: val,
})
wb.size += len(key) + len(val)
return nil
}
func (wb *WriteBatch) SetSafePoint() {
wb.safePoint = len(wb.entries)
wb.safePointSize = wb.size
}
func (wb *WriteBatch) RollbackToSafePoint() {
wb.entries = wb.entries[:wb.safePoint]
wb.size = wb.safePointSize
}
func (wb *WriteBatch) WriteToDB(db *badger.DB) error {
if len(wb.entries) > 0 {
err := db.Update(func(txn *badger.Txn) error {
for _, entry := range wb.entries {
var err1 error
if len(entry.Value) == 0 {
err1 = txn.Delete(entry.Key)
} else {
err1 = txn.SetEntry(entry)
}
if err1 != nil {
return err1
}
}
return nil
})
if err != nil {
return errors.WithStack(err)
}
}
return nil
}
func (wb *WriteBatch) MustWriteToDB(db *badger.DB) {
err := wb.WriteToDB(db)
if err != nil {
panic(err)
}
}
func (wb *WriteBatch) Reset() {
wb.entries = wb.entries[:0]
wb.size = 0
wb.safePoint = 0
wb.safePointSize = 0
wb.safePointUndo = 0
}

57
kv/util/file.go Normal file
View File

@ -0,0 +1,57 @@
package util
import (
"hash/crc32"
"io"
"os"
"github.com/pingcap/errors"
)
func GetFileSize(path string) (uint64, error) {
fi, err := os.Stat(path)
if err != nil {
return 0, errors.WithStack(err)
}
return uint64(fi.Size()), nil
}
func FileExists(path string) bool {
fi, err := os.Stat(path)
if err != nil {
return false
}
return !fi.IsDir()
}
func DirExists(path string) bool {
fi, err := os.Stat(path)
if err != nil {
return false
}
return fi.IsDir()
}
func DeleteFileIfExists(path string) (bool, error) {
err := os.Remove(path)
if os.IsNotExist(err) {
return false, nil
} else if err != nil {
return false, errors.WithStack(err)
}
return true, nil
}
// CalcCRC32 Calculates the given file's CRC32 checksum.
func CalcCRC32(path string) (uint32, error) {
digest := crc32.NewIEEE()
f, err := os.Open(path)
if err != nil {
return 0, errors.WithStack(err)
}
_, err = io.Copy(digest, f)
if err != nil {
return 0, errors.WithStack(err)
}
return digest.Sum32(), nil
}

60
kv/util/worker/worker.go Normal file
View File

@ -0,0 +1,60 @@
package worker
import "sync"
type TaskStop struct{}
type Task interface{}
type Worker struct {
name string
sender chan<- Task
receiver <-chan Task
closeCh chan struct{}
wg *sync.WaitGroup
}
type TaskHandler interface {
Handle(t Task)
}
type Starter interface {
Start()
}
func (w *Worker) Start(handler TaskHandler) {
w.wg.Add(1)
go func() {
defer w.wg.Done()
if s, ok := handler.(Starter); ok {
s.Start()
}
for {
Task := <-w.receiver
if _, ok := Task.(TaskStop); ok {
return
}
handler.Handle(Task)
}
}()
}
func (w *Worker) Sender() chan<- Task {
return w.sender
}
func (w *Worker) Stop() {
w.sender <- TaskStop{}
}
const defaultWorkerCapacity = 128
func NewWorker(name string, wg *sync.WaitGroup) *Worker {
ch := make(chan Task, defaultWorkerCapacity)
return &Worker{
sender: (chan<- Task)(ch),
receiver: (<-chan Task)(ch),
name: name,
wg: wg,
}
}

268
log/log.go Normal file
View File

@ -0,0 +1,268 @@
//high level log wrapper, so it can output different log based on level
package log
import (
"fmt"
"io"
"log"
"os"
"runtime"
)
const (
Ldate = log.Ldate
Llongfile = log.Llongfile
Lmicroseconds = log.Lmicroseconds
Lshortfile = log.Lshortfile
LstdFlags = log.LstdFlags
Ltime = log.Ltime
)
type (
LogLevel int
LogType int
)
const (
LOG_FATAL = LogType(0x1)
LOG_ERROR = LogType(0x2)
LOG_WARNING = LogType(0x4)
LOG_INFO = LogType(0x8)
LOG_DEBUG = LogType(0x10)
)
const (
LOG_LEVEL_NONE = LogLevel(0x0)
LOG_LEVEL_FATAL = LOG_LEVEL_NONE | LogLevel(LOG_FATAL)
LOG_LEVEL_ERROR = LOG_LEVEL_FATAL | LogLevel(LOG_ERROR)
LOG_LEVEL_WARN = LOG_LEVEL_ERROR | LogLevel(LOG_WARNING)
LOG_LEVEL_INFO = LOG_LEVEL_WARN | LogLevel(LOG_INFO)
LOG_LEVEL_DEBUG = LOG_LEVEL_INFO | LogLevel(LOG_DEBUG)
LOG_LEVEL_ALL = LOG_LEVEL_DEBUG
)
const FORMAT_TIME_DAY string = "20060102"
const FORMAT_TIME_HOUR string = "2006010215"
var _log *Logger = New()
func init() {
SetFlags(Ldate | Ltime | Lshortfile)
SetHighlighting(runtime.GOOS != "windows")
}
func GlobalLogger() *log.Logger {
return _log._log
}
func SetLevel(level LogLevel) {
_log.SetLevel(level)
}
func GetLogLevel() LogLevel {
return _log.level
}
func SetFlags(flags int) {
_log._log.SetFlags(flags)
}
func Info(v ...interface{}) {
_log.Info(v...)
}
func Infof(format string, v ...interface{}) {
_log.Infof(format, v...)
}
func Panic(v ...interface{}) {
_log.Panic(v...)
}
func Panicf(format string, v ...interface{}) {
_log.Panicf(format, v...)
}
func Debug(v ...interface{}) {
_log.Debug(v...)
}
func Debugf(format string, v ...interface{}) {
_log.Debugf(format, v...)
}
func Warn(v ...interface{}) {
_log.Warning(v...)
}
func Warnf(format string, v ...interface{}) {
_log.Warningf(format, v...)
}
func Warning(v ...interface{}) {
_log.Warning(v...)
}
func Warningf(format string, v ...interface{}) {
_log.Warningf(format, v...)
}
func Error(v ...interface{}) {
_log.Error(v...)
}
func Errorf(format string, v ...interface{}) {
_log.Errorf(format, v...)
}
func Fatal(v ...interface{}) {
_log.Fatal(v...)
}
func Fatalf(format string, v ...interface{}) {
_log.Fatalf(format, v...)
}
func SetLevelByString(level string) {
_log.SetLevelByString(level)
}
func SetHighlighting(highlighting bool) {
_log.SetHighlighting(highlighting)
}
type Logger struct {
_log *log.Logger
level LogLevel
highlighting bool
}
func (l *Logger) SetHighlighting(highlighting bool) {
l.highlighting = highlighting
}
func (l *Logger) SetFlags(flags int) {
l._log.SetFlags(flags)
}
func (l *Logger) Flags() int {
return l._log.Flags()
}
func (l *Logger) SetLevel(level LogLevel) {
l.level = level
}
func (l *Logger) SetLevelByString(level string) {
l.level = StringToLogLevel(level)
}
func (l *Logger) log(t LogType, v ...interface{}) {
l.logf(t, "%v\n", v)
}
func (l *Logger) logf(t LogType, format string, v ...interface{}) {
if l.level|LogLevel(t) != l.level {
return
}
logStr, logColor := LogTypeToString(t)
var s string
if l.highlighting {
s = "\033" + logColor + "m[" + logStr + "] " + fmt.Sprintf(format, v...) + "\033[0m"
} else {
s = "[" + logStr + "] " + fmt.Sprintf(format, v...)
}
l._log.Output(4, s)
}
func (l *Logger) Fatal(v ...interface{}) {
l.log(LOG_FATAL, v...)
os.Exit(-1)
}
func (l *Logger) Fatalf(format string, v ...interface{}) {
l.logf(LOG_FATAL, format, v...)
os.Exit(-1)
}
func (l *Logger) Panic(v ...interface{}) {
l._log.Panic(v...)
}
func (l *Logger) Panicf(format string, v ...interface{}) {
l._log.Panicf(format, v...)
}
func (l *Logger) Error(v ...interface{}) {
l.log(LOG_ERROR, v...)
}
func (l *Logger) Errorf(format string, v ...interface{}) {
l.logf(LOG_ERROR, format, v...)
}
func (l *Logger) Warning(v ...interface{}) {
l.log(LOG_WARNING, v...)
}
func (l *Logger) Warningf(format string, v ...interface{}) {
l.logf(LOG_WARNING, format, v...)
}
func (l *Logger) Debug(v ...interface{}) {
l.log(LOG_DEBUG, v...)
}
func (l *Logger) Debugf(format string, v ...interface{}) {
l.logf(LOG_DEBUG, format, v...)
}
func (l *Logger) Info(v ...interface{}) {
l.log(LOG_INFO, v...)
}
func (l *Logger) Infof(format string, v ...interface{}) {
l.logf(LOG_INFO, format, v...)
}
func StringToLogLevel(level string) LogLevel {
switch level {
case "fatal":
return LOG_LEVEL_FATAL
case "error":
return LOG_LEVEL_ERROR
case "warn":
return LOG_LEVEL_WARN
case "warning":
return LOG_LEVEL_WARN
case "debug":
return LOG_LEVEL_DEBUG
case "info":
return LOG_LEVEL_INFO
}
return LOG_LEVEL_ALL
}
func LogTypeToString(t LogType) (string, string) {
switch t {
case LOG_FATAL:
return "fatal", "[0;31"
case LOG_ERROR:
return "error", "[0;31"
case LOG_WARNING:
return "warning", "[0;33"
case LOG_DEBUG:
return "debug", "[0;36"
case LOG_INFO:
return "info", "[0;37"
}
return "unknown", "[0;37"
}
func New() *Logger {
return NewLogger(os.Stderr, "")
}
func NewLogger(w io.Writer, prefix string) *Logger {
return &Logger{_log: log.New(w, prefix, LstdFlags), level: LOG_LEVEL_ALL, highlighting: true}
}

73
proto/generate_go.sh Executable file
View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
# This script generates Go representations of Protobuf protocols. It will generate Go code in the pkg subdirectory
# for every protocol in the proto subdirectory. It uses protoc, the protobuf compiler, which must be installed.
set -ex
push () {
pushd $1 >/dev/null 2>&1
}
pop () {
popd $1 >/dev/null 2>&1
}
cmd_exists () {
which "$1" 1>/dev/null 2>&1
}
PROGRAM=$(basename "$0")
if [ -z $GOPATH ]; then
printf "Error: the environment variable GOPATH is not set, please set it before running %s\n" $PROGRAM > /dev/stderr
exit 1
fi
GO_PREFIX_PATH=github.com/pingcap-incubator/tinykv/proto/pkg
export PATH=$(pwd)/_tools/bin:$GOPATH/bin:$PATH
echo "install tools..."
GO111MODULE=off go get github.com/twitchtv/retool
# Ensure we're using the right versions of our tools (see tools.json).
GO111MODULE=off retool -base-dir=$(pwd) sync || exit 1
function collect() {
file=$(basename $1)
base_name=$(basename $file ".proto")
mkdir -p ../pkg/$base_name
if [ -z $GO_OUT_M ]; then
GO_OUT_M="M$file=$GO_PREFIX_PATH/$base_name"
else
GO_OUT_M="$GO_OUT_M,M$file=$GO_PREFIX_PATH/$base_name"
fi
}
cd proto
for file in `ls *.proto`
do
collect $file
done
echo "generate go code..."
ret=0
function gen() {
base_name=$(basename $1 ".proto")
protoc -I.:../include --gofast_out=plugins=grpc,$GO_OUT_M:../pkg/$base_name $1 || ret=$?
cd ../pkg/$base_name
sed -i.bak -E 's/import _ \"gogoproto\"//g' *.pb.go
sed -i.bak -E 's/import fmt \"fmt\"//g' *.pb.go
sed -i.bak -E 's/import io \"io\"//g' *.pb.go
sed -i.bak -E 's/import math \"math\"//g' *.pb.go
sed -i.bak -E 's/import _ \".*rustproto\"//' *.pb.go
rm -f *.bak
goimports -w *.pb.go
cd ../../proto
}
for file in `ls *.proto`
do
gen $file
done
exit $ret

View File

@ -0,0 +1,136 @@
// Protocol Buffers for Go with Gadgets
//
// Copyright (c) 2013, The GoGo Authors. All rights reserved.
// http://github.com/gogo/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package gogoproto;
import "google/protobuf/descriptor.proto";
option java_package = "com.google.protobuf";
option java_outer_classname = "GoGoProtos";
option go_package = "github.com/gogo/protobuf/gogoproto";
extend google.protobuf.EnumOptions {
optional bool goproto_enum_prefix = 62001;
optional bool goproto_enum_stringer = 62021;
optional bool enum_stringer = 62022;
optional string enum_customname = 62023;
optional bool enumdecl = 62024;
}
extend google.protobuf.EnumValueOptions {
optional string enumvalue_customname = 66001;
}
extend google.protobuf.FileOptions {
optional bool goproto_getters_all = 63001;
optional bool goproto_enum_prefix_all = 63002;
optional bool goproto_stringer_all = 63003;
optional bool verbose_equal_all = 63004;
optional bool face_all = 63005;
optional bool gostring_all = 63006;
optional bool populate_all = 63007;
optional bool stringer_all = 63008;
optional bool onlyone_all = 63009;
optional bool equal_all = 63013;
optional bool description_all = 63014;
optional bool testgen_all = 63015;
optional bool benchgen_all = 63016;
optional bool marshaler_all = 63017;
optional bool unmarshaler_all = 63018;
optional bool stable_marshaler_all = 63019;
optional bool sizer_all = 63020;
optional bool goproto_enum_stringer_all = 63021;
optional bool enum_stringer_all = 63022;
optional bool unsafe_marshaler_all = 63023;
optional bool unsafe_unmarshaler_all = 63024;
optional bool goproto_extensions_map_all = 63025;
optional bool goproto_unrecognized_all = 63026;
optional bool gogoproto_import = 63027;
optional bool protosizer_all = 63028;
optional bool compare_all = 63029;
optional bool typedecl_all = 63030;
optional bool enumdecl_all = 63031;
optional bool goproto_registration = 63032;
optional bool messagename_all = 63033;
}
extend google.protobuf.MessageOptions {
optional bool goproto_getters = 64001;
optional bool goproto_stringer = 64003;
optional bool verbose_equal = 64004;
optional bool face = 64005;
optional bool gostring = 64006;
optional bool populate = 64007;
optional bool stringer = 67008;
optional bool onlyone = 64009;
optional bool equal = 64013;
optional bool description = 64014;
optional bool testgen = 64015;
optional bool benchgen = 64016;
optional bool marshaler = 64017;
optional bool unmarshaler = 64018;
optional bool stable_marshaler = 64019;
optional bool sizer = 64020;
optional bool unsafe_marshaler = 64023;
optional bool unsafe_unmarshaler = 64024;
optional bool goproto_extensions_map = 64025;
optional bool goproto_unrecognized = 64026;
optional bool protosizer = 64028;
optional bool compare = 64029;
optional bool typedecl = 64030;
optional bool messagename = 64033;
}
extend google.protobuf.FieldOptions {
optional bool nullable = 65001;
optional bool embed = 65002;
optional string customtype = 65003;
optional string customname = 65004;
optional string jsontag = 65005;
optional string moretags = 65006;
optional string casttype = 65007;
optional string castkey = 65008;
optional string castvalue = 65009;
optional bool stdtime = 65010;
optional bool stdduration = 65011;
}

View File

@ -0,0 +1,154 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option go_package = "types";
option java_package = "com.google.protobuf";
option java_outer_classname = "AnyProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
// `Any` contains an arbitrary serialized protocol buffer message along with a
// URL that describes the type of the serialized message.
//
// Protobuf library provides support to pack/unpack Any values in the form
// of utility functions or additional generated methods of the Any type.
//
// Example 1: Pack and unpack a message in C++.
//
// Foo foo = ...;
// Any any;
// any.PackFrom(foo);
// ...
// if (any.UnpackTo(&foo)) {
// ...
// }
//
// Example 2: Pack and unpack a message in Java.
//
// Foo foo = ...;
// Any any = Any.pack(foo);
// ...
// if (any.is(Foo.class)) {
// foo = any.unpack(Foo.class);
// }
//
// Example 3: Pack and unpack a message in Python.
//
// foo = Foo(...)
// any = Any()
// any.Pack(foo)
// ...
// if any.Is(Foo.DESCRIPTOR):
// any.Unpack(foo)
// ...
//
// Example 4: Pack and unpack a message in Go
//
// foo := &pb.Foo{...}
// any, err := ptypes.MarshalAny(foo)
// ...
// foo := &pb.Foo{}
// if err := ptypes.UnmarshalAny(any, foo); err != nil {
// ...
// }
//
// The pack methods provided by protobuf library will by default use
// 'type.googleapis.com/full.type.name' as the type URL and the unpack
// methods only use the fully qualified type name after the last '/'
// in the type URL, for example "foo.bar.com/x/y.z" will yield type
// name "y.z".
//
//
// JSON
// ====
// The JSON representation of an `Any` value uses the regular
// representation of the deserialized, embedded message, with an
// additional field `@type` which contains the type URL. Example:
//
// package google.profile;
// message Person {
// string first_name = 1;
// string last_name = 2;
// }
//
// {
// "@type": "type.googleapis.com/google.profile.Person",
// "firstName": <string>,
// "lastName": <string>
// }
//
// If the embedded message type is well-known and has a custom JSON
// representation, that representation will be embedded adding a field
// `value` which holds the custom JSON in addition to the `@type`
// field. Example (for message [google.protobuf.Duration][]):
//
// {
// "@type": "type.googleapis.com/google.protobuf.Duration",
// "value": "1.212s"
// }
//
message Any {
// A URL/resource name that uniquely identifies the type of the serialized
// protocol buffer message. The last segment of the URL's path must represent
// the fully qualified name of the type (as in
// `path/google.protobuf.Duration`). The name should be in a canonical form
// (e.g., leading "." is not accepted).
//
// In practice, teams usually precompile into the binary all types that they
// expect it to use in the context of Any. However, for URLs which use the
// scheme `http`, `https`, or no scheme, one can optionally set up a type
// server that maps type URLs to message definitions as follows:
//
// * If no scheme is provided, `https` is assumed.
// * An HTTP GET on the URL must yield a [google.protobuf.Type][]
// value in binary format, or produce an error.
// * Applications are allowed to cache lookup results based on the
// URL, or have them precompiled into a binary to avoid any
// lookup. Therefore, binary compatibility needs to be preserved
// on changes to types. (Use versioned type names to manage
// breaking changes.)
//
// Note: this functionality is not currently available in the official
// protobuf release, and it is not used for type URLs beginning with
// type.googleapis.com.
//
// Schemes other than `http`, `https` (or the empty scheme) might be
// used with implementation specific semantics.
//
string type_url = 1;
// Must be a valid serialized protocol buffer of the above specified type.
bytes value = 2;
}

View File

@ -0,0 +1,210 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
import "google/protobuf/source_context.proto";
import "google/protobuf/type.proto";
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option java_package = "com.google.protobuf";
option java_outer_classname = "ApiProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
option go_package = "types";
// Api is a light-weight descriptor for an API Interface.
//
// Interfaces are also described as "protocol buffer services" in some contexts,
// such as by the "service" keyword in a .proto file, but they are different
// from API Services, which represent a concrete implementation of an interface
// as opposed to simply a description of methods and bindings. They are also
// sometimes simply referred to as "APIs" in other contexts, such as the name of
// this message itself. See https://cloud.google.com/apis/design/glossary for
// detailed terminology.
message Api {
// The fully qualified name of this interface, including package name
// followed by the interface's simple name.
string name = 1;
// The methods of this interface, in unspecified order.
repeated Method methods = 2;
// Any metadata attached to the interface.
repeated Option options = 3;
// A version string for this interface. If specified, must have the form
// `major-version.minor-version`, as in `1.10`. If the minor version is
// omitted, it defaults to zero. If the entire version field is empty, the
// major version is derived from the package name, as outlined below. If the
// field is not empty, the version in the package name will be verified to be
// consistent with what is provided here.
//
// The versioning schema uses [semantic
// versioning](http://semver.org) where the major version number
// indicates a breaking change and the minor version an additive,
// non-breaking change. Both version numbers are signals to users
// what to expect from different versions, and should be carefully
// chosen based on the product plan.
//
// The major version is also reflected in the package name of the
// interface, which must end in `v<major-version>`, as in
// `google.feature.v1`. For major versions 0 and 1, the suffix can
// be omitted. Zero major versions must only be used for
// experimental, non-GA interfaces.
//
//
string version = 4;
// Source context for the protocol buffer service represented by this
// message.
SourceContext source_context = 5;
// Included interfaces. See [Mixin][].
repeated Mixin mixins = 6;
// The source syntax of the service.
Syntax syntax = 7;
}
// Method represents a method of an API interface.
message Method {
// The simple name of this method.
string name = 1;
// A URL of the input message type.
string request_type_url = 2;
// If true, the request is streamed.
bool request_streaming = 3;
// The URL of the output message type.
string response_type_url = 4;
// If true, the response is streamed.
bool response_streaming = 5;
// Any metadata attached to the method.
repeated Option options = 6;
// The source syntax of this method.
Syntax syntax = 7;
}
// Declares an API Interface to be included in this interface. The including
// interface must redeclare all the methods from the included interface, but
// documentation and options are inherited as follows:
//
// - If after comment and whitespace stripping, the documentation
// string of the redeclared method is empty, it will be inherited
// from the original method.
//
// - Each annotation belonging to the service config (http,
// visibility) which is not set in the redeclared method will be
// inherited.
//
// - If an http annotation is inherited, the path pattern will be
// modified as follows. Any version prefix will be replaced by the
// version of the including interface plus the [root][] path if
// specified.
//
// Example of a simple mixin:
//
// package google.acl.v1;
// service AccessControl {
// // Get the underlying ACL object.
// rpc GetAcl(GetAclRequest) returns (Acl) {
// option (google.api.http).get = "/v1/{resource=**}:getAcl";
// }
// }
//
// package google.storage.v2;
// service Storage {
// rpc GetAcl(GetAclRequest) returns (Acl);
//
// // Get a data record.
// rpc GetData(GetDataRequest) returns (Data) {
// option (google.api.http).get = "/v2/{resource=**}";
// }
// }
//
// Example of a mixin configuration:
//
// apis:
// - name: google.storage.v2.Storage
// mixins:
// - name: google.acl.v1.AccessControl
//
// The mixin construct implies that all methods in `AccessControl` are
// also declared with same name and request/response types in
// `Storage`. A documentation generator or annotation processor will
// see the effective `Storage.GetAcl` method after inherting
// documentation and annotations as follows:
//
// service Storage {
// // Get the underlying ACL object.
// rpc GetAcl(GetAclRequest) returns (Acl) {
// option (google.api.http).get = "/v2/{resource=**}:getAcl";
// }
// ...
// }
//
// Note how the version in the path pattern changed from `v1` to `v2`.
//
// If the `root` field in the mixin is specified, it should be a
// relative path under which inherited HTTP paths are placed. Example:
//
// apis:
// - name: google.storage.v2.Storage
// mixins:
// - name: google.acl.v1.AccessControl
// root: acls
//
// This implies the following inherited HTTP annotation:
//
// service Storage {
// // Get the underlying ACL object.
// rpc GetAcl(GetAclRequest) returns (Acl) {
// option (google.api.http).get = "/v2/acls/{resource=**}:getAcl";
// }
// ...
// }
message Mixin {
// The fully qualified name of the interface which is included.
string name = 1;
// If non-empty specifies a path under which inherited HTTP paths
// are rooted.
string root = 2;
}

View File

@ -0,0 +1,167 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Author: kenton@google.com (Kenton Varda)
//
// WARNING: The plugin interface is currently EXPERIMENTAL and is subject to
// change.
//
// protoc (aka the Protocol Compiler) can be extended via plugins. A plugin is
// just a program that reads a CodeGeneratorRequest from stdin and writes a
// CodeGeneratorResponse to stdout.
//
// Plugins written using C++ can use google/protobuf/compiler/plugin.h instead
// of dealing with the raw protocol defined here.
//
// A plugin executable needs only to be placed somewhere in the path. The
// plugin should be named "protoc-gen-$NAME", and will then be used when the
// flag "--${NAME}_out" is passed to protoc.
syntax = "proto2";
package google.protobuf.compiler;
option java_package = "com.google.protobuf.compiler";
option java_outer_classname = "PluginProtos";
option go_package = "plugin_go";
import "google/protobuf/descriptor.proto";
// The version number of protocol compiler.
message Version {
optional int32 major = 1;
optional int32 minor = 2;
optional int32 patch = 3;
// A suffix for alpha, beta or rc release, e.g., "alpha-1", "rc2". It should
// be empty for mainline stable releases.
optional string suffix = 4;
}
// An encoded CodeGeneratorRequest is written to the plugin's stdin.
message CodeGeneratorRequest {
// The .proto files that were explicitly listed on the command-line. The
// code generator should generate code only for these files. Each file's
// descriptor will be included in proto_file, below.
repeated string file_to_generate = 1;
// The generator parameter passed on the command-line.
optional string parameter = 2;
// FileDescriptorProtos for all files in files_to_generate and everything
// they import. The files will appear in topological order, so each file
// appears before any file that imports it.
//
// protoc guarantees that all proto_files will be written after
// the fields above, even though this is not technically guaranteed by the
// protobuf wire format. This theoretically could allow a plugin to stream
// in the FileDescriptorProtos and handle them one by one rather than read
// the entire set into memory at once. However, as of this writing, this
// is not similarly optimized on protoc's end -- it will store all fields in
// memory at once before sending them to the plugin.
//
// Type names of fields and extensions in the FileDescriptorProto are always
// fully qualified.
repeated FileDescriptorProto proto_file = 15;
// The version number of protocol compiler.
optional Version compiler_version = 3;
}
// The plugin writes an encoded CodeGeneratorResponse to stdout.
message CodeGeneratorResponse {
// Error message. If non-empty, code generation failed. The plugin process
// should exit with status code zero even if it reports an error in this way.
//
// This should be used to indicate errors in .proto files which prevent the
// code generator from generating correct code. Errors which indicate a
// problem in protoc itself -- such as the input CodeGeneratorRequest being
// unparseable -- should be reported by writing a message to stderr and
// exiting with a non-zero status code.
optional string error = 1;
// Represents a single generated file.
message File {
// The file name, relative to the output directory. The name must not
// contain "." or ".." components and must be relative, not be absolute (so,
// the file cannot lie outside the output directory). "/" must be used as
// the path separator, not "\".
//
// If the name is omitted, the content will be appended to the previous
// file. This allows the generator to break large files into small chunks,
// and allows the generated text to be streamed back to protoc so that large
// files need not reside completely in memory at one time. Note that as of
// this writing protoc does not optimize for this -- it will read the entire
// CodeGeneratorResponse before writing files to disk.
optional string name = 1;
// If non-empty, indicates that the named file should already exist, and the
// content here is to be inserted into that file at a defined insertion
// point. This feature allows a code generator to extend the output
// produced by another code generator. The original generator may provide
// insertion points by placing special annotations in the file that look
// like:
// @@protoc_insertion_point(NAME)
// The annotation can have arbitrary text before and after it on the line,
// which allows it to be placed in a comment. NAME should be replaced with
// an identifier naming the point -- this is what other generators will use
// as the insertion_point. Code inserted at this point will be placed
// immediately above the line containing the insertion point (thus multiple
// insertions to the same point will come out in the order they were added).
// The double-@ is intended to make it unlikely that the generated code
// could contain things that look like insertion points by accident.
//
// For example, the C++ code generator places the following line in the
// .pb.h files that it generates:
// // @@protoc_insertion_point(namespace_scope)
// This line appears within the scope of the file's package namespace, but
// outside of any particular class. Another plugin can then specify the
// insertion_point "namespace_scope" to generate additional classes or
// other declarations that should be placed in this scope.
//
// Note that if the line containing the insertion point begins with
// whitespace, the same whitespace will be added to every line of the
// inserted text. This is useful for languages like Python, where
// indentation matters. In these languages, the insertion point comment
// should be indented the same amount as any inserted code will need to be
// in order to work correctly in that context.
//
// The code generator that generates the initial file and the one which
// inserts into it must both run as part of a single invocation of protoc.
// Code generators are executed in the order in which they appear on the
// command line.
//
// If |insertion_point| is present, |name| must also be present.
optional string insertion_point = 2;
// The file contents.
optional string content = 15;
}
repeated File file = 15;
}

View File

@ -0,0 +1,872 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Author: kenton@google.com (Kenton Varda)
// Based on original Protocol Buffers design by
// Sanjay Ghemawat, Jeff Dean, and others.
//
// The messages in this file describe the definitions found in .proto files.
// A valid .proto file can be translated directly to a FileDescriptorProto
// without any other information (e.g. without reading its imports).
syntax = "proto2";
package google.protobuf;
option go_package = "descriptor";
option java_package = "com.google.protobuf";
option java_outer_classname = "DescriptorProtos";
option csharp_namespace = "Google.Protobuf.Reflection";
option objc_class_prefix = "GPB";
option cc_enable_arenas = true;
// descriptor.proto must be optimized for speed because reflection-based
// algorithms don't work during bootstrapping.
option optimize_for = SPEED;
// The protocol compiler can output a FileDescriptorSet containing the .proto
// files it parses.
message FileDescriptorSet {
repeated FileDescriptorProto file = 1;
}
// Describes a complete .proto file.
message FileDescriptorProto {
optional string name = 1; // file name, relative to root of source tree
optional string package = 2; // e.g. "foo", "foo.bar", etc.
// Names of files imported by this file.
repeated string dependency = 3;
// Indexes of the public imported files in the dependency list above.
repeated int32 public_dependency = 10;
// Indexes of the weak imported files in the dependency list.
// For Google-internal migration only. Do not use.
repeated int32 weak_dependency = 11;
// All top-level definitions in this file.
repeated DescriptorProto message_type = 4;
repeated EnumDescriptorProto enum_type = 5;
repeated ServiceDescriptorProto service = 6;
repeated FieldDescriptorProto extension = 7;
optional FileOptions options = 8;
// This field contains optional information about the original source code.
// You may safely remove this entire field without harming runtime
// functionality of the descriptors -- the information is needed only by
// development tools.
optional SourceCodeInfo source_code_info = 9;
// The syntax of the proto file.
// The supported values are "proto2" and "proto3".
optional string syntax = 12;
}
// Describes a message type.
message DescriptorProto {
optional string name = 1;
repeated FieldDescriptorProto field = 2;
repeated FieldDescriptorProto extension = 6;
repeated DescriptorProto nested_type = 3;
repeated EnumDescriptorProto enum_type = 4;
message ExtensionRange {
optional int32 start = 1;
optional int32 end = 2;
optional ExtensionRangeOptions options = 3;
}
repeated ExtensionRange extension_range = 5;
repeated OneofDescriptorProto oneof_decl = 8;
optional MessageOptions options = 7;
// Range of reserved tag numbers. Reserved tag numbers may not be used by
// fields or extension ranges in the same message. Reserved ranges may
// not overlap.
message ReservedRange {
optional int32 start = 1; // Inclusive.
optional int32 end = 2; // Exclusive.
}
repeated ReservedRange reserved_range = 9;
// Reserved field names, which may not be used by fields in the same message.
// A given name may only be reserved once.
repeated string reserved_name = 10;
}
message ExtensionRangeOptions {
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
// Describes a field within a message.
message FieldDescriptorProto {
enum Type {
// 0 is reserved for errors.
// Order is weird for historical reasons.
TYPE_DOUBLE = 1;
TYPE_FLOAT = 2;
// Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if
// negative values are likely.
TYPE_INT64 = 3;
TYPE_UINT64 = 4;
// Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if
// negative values are likely.
TYPE_INT32 = 5;
TYPE_FIXED64 = 6;
TYPE_FIXED32 = 7;
TYPE_BOOL = 8;
TYPE_STRING = 9;
// Tag-delimited aggregate.
// Group type is deprecated and not supported in proto3. However, Proto3
// implementations should still be able to parse the group wire format and
// treat group fields as unknown fields.
TYPE_GROUP = 10;
TYPE_MESSAGE = 11; // Length-delimited aggregate.
// New in version 2.
TYPE_BYTES = 12;
TYPE_UINT32 = 13;
TYPE_ENUM = 14;
TYPE_SFIXED32 = 15;
TYPE_SFIXED64 = 16;
TYPE_SINT32 = 17; // Uses ZigZag encoding.
TYPE_SINT64 = 18; // Uses ZigZag encoding.
};
enum Label {
// 0 is reserved for errors
LABEL_OPTIONAL = 1;
LABEL_REQUIRED = 2;
LABEL_REPEATED = 3;
};
optional string name = 1;
optional int32 number = 3;
optional Label label = 4;
// If type_name is set, this need not be set. If both this and type_name
// are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP.
optional Type type = 5;
// For message and enum types, this is the name of the type. If the name
// starts with a '.', it is fully-qualified. Otherwise, C++-like scoping
// rules are used to find the type (i.e. first the nested types within this
// message are searched, then within the parent, on up to the root
// namespace).
optional string type_name = 6;
// For extensions, this is the name of the type being extended. It is
// resolved in the same manner as type_name.
optional string extendee = 2;
// For numeric types, contains the original text representation of the value.
// For booleans, "true" or "false".
// For strings, contains the default text contents (not escaped in any way).
// For bytes, contains the C escaped value. All bytes >= 128 are escaped.
// TODO(kenton): Base-64 encode?
optional string default_value = 7;
// If set, gives the index of a oneof in the containing type's oneof_decl
// list. This field is a member of that oneof.
optional int32 oneof_index = 9;
// JSON name of this field. The value is set by protocol compiler. If the
// user has set a "json_name" option on this field, that option's value
// will be used. Otherwise, it's deduced from the field's name by converting
// it to camelCase.
optional string json_name = 10;
optional FieldOptions options = 8;
}
// Describes a oneof.
message OneofDescriptorProto {
optional string name = 1;
optional OneofOptions options = 2;
}
// Describes an enum type.
message EnumDescriptorProto {
optional string name = 1;
repeated EnumValueDescriptorProto value = 2;
optional EnumOptions options = 3;
// Range of reserved numeric values. Reserved values may not be used by
// entries in the same enum. Reserved ranges may not overlap.
//
// Note that this is distinct from DescriptorProto.ReservedRange in that it
// is inclusive such that it can appropriately represent the entire int32
// domain.
message EnumReservedRange {
optional int32 start = 1; // Inclusive.
optional int32 end = 2; // Inclusive.
}
// Range of reserved numeric values. Reserved numeric values may not be used
// by enum values in the same enum declaration. Reserved ranges may not
// overlap.
repeated EnumReservedRange reserved_range = 4;
// Reserved enum value names, which may not be reused. A given name may only
// be reserved once.
repeated string reserved_name = 5;
}
// Describes a value within an enum.
message EnumValueDescriptorProto {
optional string name = 1;
optional int32 number = 2;
optional EnumValueOptions options = 3;
}
// Describes a service.
message ServiceDescriptorProto {
optional string name = 1;
repeated MethodDescriptorProto method = 2;
optional ServiceOptions options = 3;
}
// Describes a method of a service.
message MethodDescriptorProto {
optional string name = 1;
// Input and output type names. These are resolved in the same way as
// FieldDescriptorProto.type_name, but must refer to a message type.
optional string input_type = 2;
optional string output_type = 3;
optional MethodOptions options = 4;
// Identifies if client streams multiple client messages
optional bool client_streaming = 5 [default=false];
// Identifies if server streams multiple server messages
optional bool server_streaming = 6 [default=false];
}
// ===================================================================
// Options
// Each of the definitions above may have "options" attached. These are
// just annotations which may cause code to be generated slightly differently
// or may contain hints for code that manipulates protocol messages.
//
// Clients may define custom options as extensions of the *Options messages.
// These extensions may not yet be known at parsing time, so the parser cannot
// store the values in them. Instead it stores them in a field in the *Options
// message called uninterpreted_option. This field must have the same name
// across all *Options messages. We then use this field to populate the
// extensions when we build a descriptor, at which point all protos have been
// parsed and so all extensions are known.
//
// Extension numbers for custom options may be chosen as follows:
// * For options which will only be used within a single application or
// organization, or for experimental options, use field numbers 50000
// through 99999. It is up to you to ensure that you do not use the
// same number for multiple options.
// * For options which will be published and used publicly by multiple
// independent entities, e-mail protobuf-global-extension-registry@google.com
// to reserve extension numbers. Simply provide your project name (e.g.
// Objective-C plugin) and your project website (if available) -- there's no
// need to explain how you intend to use them. Usually you only need one
// extension number. You can declare multiple options with only one extension
// number by putting them in a sub-message. See the Custom Options section of
// the docs for examples:
// https://developers.google.com/protocol-buffers/docs/proto#options
// If this turns out to be popular, a web service will be set up
// to automatically assign option numbers.
message FileOptions {
// Sets the Java package where classes generated from this .proto will be
// placed. By default, the proto package is used, but this is often
// inappropriate because proto packages do not normally start with backwards
// domain names.
optional string java_package = 1;
// If set, all the classes from the .proto file are wrapped in a single
// outer class with the given name. This applies to both Proto1
// (equivalent to the old "--one_java_file" option) and Proto2 (where
// a .proto always translates to a single class, but you may want to
// explicitly choose the class name).
optional string java_outer_classname = 8;
// If set true, then the Java code generator will generate a separate .java
// file for each top-level message, enum, and service defined in the .proto
// file. Thus, these types will *not* be nested inside the outer class
// named by java_outer_classname. However, the outer class will still be
// generated to contain the file's getDescriptor() method as well as any
// top-level extensions defined in the file.
optional bool java_multiple_files = 10 [default=false];
// This option does nothing.
optional bool java_generate_equals_and_hash = 20 [deprecated=true];
// If set true, then the Java2 code generator will generate code that
// throws an exception whenever an attempt is made to assign a non-UTF-8
// byte sequence to a string field.
// Message reflection will do the same.
// However, an extension field still accepts non-UTF-8 byte sequences.
// This option has no effect on when used with the lite runtime.
optional bool java_string_check_utf8 = 27 [default=false];
// Generated classes can be optimized for speed or code size.
enum OptimizeMode {
SPEED = 1; // Generate complete code for parsing, serialization,
// etc.
CODE_SIZE = 2; // Use ReflectionOps to implement these methods.
LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime.
}
optional OptimizeMode optimize_for = 9 [default=SPEED];
// Sets the Go package where structs generated from this .proto will be
// placed. If omitted, the Go package will be derived from the following:
// - The basename of the package import path, if provided.
// - Otherwise, the package statement in the .proto file, if present.
// - Otherwise, the basename of the .proto file, without extension.
optional string go_package = 11;
// Should generic services be generated in each language? "Generic" services
// are not specific to any particular RPC system. They are generated by the
// main code generators in each language (without additional plugins).
// Generic services were the only kind of service generation supported by
// early versions of google.protobuf.
//
// Generic services are now considered deprecated in favor of using plugins
// that generate code specific to your particular RPC system. Therefore,
// these default to false. Old code which depends on generic services should
// explicitly set them to true.
optional bool cc_generic_services = 16 [default=false];
optional bool java_generic_services = 17 [default=false];
optional bool py_generic_services = 18 [default=false];
optional bool php_generic_services = 42 [default=false];
// Is this file deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for everything in the file, or it will be completely ignored; in the very
// least, this is a formalization for deprecating files.
optional bool deprecated = 23 [default=false];
// Enables the use of arenas for the proto messages in this file. This applies
// only to generated classes for C++.
optional bool cc_enable_arenas = 31 [default=false];
// Sets the objective c class prefix which is prepended to all objective c
// generated classes from this .proto. There is no default.
optional string objc_class_prefix = 36;
// Namespace for generated classes; defaults to the package.
optional string csharp_namespace = 37;
// By default Swift generators will take the proto package and CamelCase it
// replacing '.' with underscore and use that to prefix the types/symbols
// defined. When this options is provided, they will use this value instead
// to prefix the types/symbols defined.
optional string swift_prefix = 39;
// Sets the php class prefix which is prepended to all php generated classes
// from this .proto. Default is empty.
optional string php_class_prefix = 40;
// Use this option to change the namespace of php generated classes. Default
// is empty. When this option is empty, the package name will be used for
// determining the namespace.
optional string php_namespace = 41;
// The parser stores options it doesn't recognize here.
// See the documentation for the "Options" section above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message.
// See the documentation for the "Options" section above.
extensions 1000 to max;
//reserved 38;
}
message MessageOptions {
// Set true to use the old proto1 MessageSet wire format for extensions.
// This is provided for backwards-compatibility with the MessageSet wire
// format. You should not use this for any other reason: It's less
// efficient, has fewer features, and is more complicated.
//
// The message must be defined exactly as follows:
// message Foo {
// option message_set_wire_format = true;
// extensions 4 to max;
// }
// Note that the message cannot have any defined fields; MessageSets only
// have extensions.
//
// All extensions of your type must be singular messages; e.g. they cannot
// be int32s, enums, or repeated messages.
//
// Because this is an option, the above two restrictions are not enforced by
// the protocol compiler.
optional bool message_set_wire_format = 1 [default=false];
// Disables the generation of the standard "descriptor()" accessor, which can
// conflict with a field of the same name. This is meant to make migration
// from proto1 easier; new code should avoid fields named "descriptor".
optional bool no_standard_descriptor_accessor = 2 [default=false];
// Is this message deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for the message, or it will be completely ignored; in the very least,
// this is a formalization for deprecating messages.
optional bool deprecated = 3 [default=false];
// Whether the message is an automatically generated map entry type for the
// maps field.
//
// For maps fields:
// map<KeyType, ValueType> map_field = 1;
// The parsed descriptor looks like:
// message MapFieldEntry {
// option map_entry = true;
// optional KeyType key = 1;
// optional ValueType value = 2;
// }
// repeated MapFieldEntry map_field = 1;
//
// Implementations may choose not to generate the map_entry=true message, but
// use a native map in the target language to hold the keys and values.
// The reflection APIs in such implementions still need to work as
// if the field is a repeated message field.
//
// NOTE: Do not set the option in .proto files. Always use the maps syntax
// instead. The option should only be implicitly set by the proto compiler
// parser.
optional bool map_entry = 7;
//reserved 8; // javalite_serializable
//reserved 9; // javanano_as_lite
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
message FieldOptions {
// The ctype option instructs the C++ code generator to use a different
// representation of the field than it normally would. See the specific
// options below. This option is not yet implemented in the open source
// release -- sorry, we'll try to include it in a future version!
optional CType ctype = 1 [default = STRING];
enum CType {
// Default mode.
STRING = 0;
CORD = 1;
STRING_PIECE = 2;
}
// The packed option can be enabled for repeated primitive fields to enable
// a more efficient representation on the wire. Rather than repeatedly
// writing the tag and type for each element, the entire array is encoded as
// a single length-delimited blob. In proto3, only explicit setting it to
// false will avoid using packed encoding.
optional bool packed = 2;
// The jstype option determines the JavaScript type used for values of the
// field. The option is permitted only for 64 bit integral and fixed types
// (int64, uint64, sint64, fixed64, sfixed64). A field with jstype JS_STRING
// is represented as JavaScript string, which avoids loss of precision that
// can happen when a large value is converted to a floating point JavaScript.
// Specifying JS_NUMBER for the jstype causes the generated JavaScript code to
// use the JavaScript "number" type. The behavior of the default option
// JS_NORMAL is implementation dependent.
//
// This option is an enum to permit additional types to be added, e.g.
// goog.math.Integer.
optional JSType jstype = 6 [default = JS_NORMAL];
enum JSType {
// Use the default type.
JS_NORMAL = 0;
// Use JavaScript strings.
JS_STRING = 1;
// Use JavaScript numbers.
JS_NUMBER = 2;
}
// Should this field be parsed lazily? Lazy applies only to message-type
// fields. It means that when the outer message is initially parsed, the
// inner message's contents will not be parsed but instead stored in encoded
// form. The inner message will actually be parsed when it is first accessed.
//
// This is only a hint. Implementations are free to choose whether to use
// eager or lazy parsing regardless of the value of this option. However,
// setting this option true suggests that the protocol author believes that
// using lazy parsing on this field is worth the additional bookkeeping
// overhead typically needed to implement it.
//
// This option does not affect the public interface of any generated code;
// all method signatures remain the same. Furthermore, thread-safety of the
// interface is not affected by this option; const methods remain safe to
// call from multiple threads concurrently, while non-const methods continue
// to require exclusive access.
//
//
// Note that implementations may choose not to check required fields within
// a lazy sub-message. That is, calling IsInitialized() on the outer message
// may return true even if the inner message has missing required fields.
// This is necessary because otherwise the inner message would have to be
// parsed in order to perform the check, defeating the purpose of lazy
// parsing. An implementation which chooses not to check required fields
// must be consistent about it. That is, for any particular sub-message, the
// implementation must either *always* check its required fields, or *never*
// check its required fields, regardless of whether or not the message has
// been parsed.
optional bool lazy = 5 [default=false];
// Is this field deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for accessors, or it will be completely ignored; in the very least, this
// is a formalization for deprecating fields.
optional bool deprecated = 3 [default=false];
// For Google-internal migration only. Do not use.
optional bool weak = 10 [default=false];
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
//reserved 4; // removed jtype
}
message OneofOptions {
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
message EnumOptions {
// Set this option to true to allow mapping different tag names to the same
// value.
optional bool allow_alias = 2;
// Is this enum deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for the enum, or it will be completely ignored; in the very least, this
// is a formalization for deprecating enums.
optional bool deprecated = 3 [default=false];
//reserved 5; // javanano_as_lite
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
message EnumValueOptions {
// Is this enum value deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for the enum value, or it will be completely ignored; in the very least,
// this is a formalization for deprecating enum values.
optional bool deprecated = 1 [default=false];
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
message ServiceOptions {
// Note: Field numbers 1 through 32 are reserved for Google's internal RPC
// framework. We apologize for hoarding these numbers to ourselves, but
// we were already using them long before we decided to release Protocol
// Buffers.
// Is this service deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for the service, or it will be completely ignored; in the very least,
// this is a formalization for deprecating services.
optional bool deprecated = 33 [default=false];
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
message MethodOptions {
// Note: Field numbers 1 through 32 are reserved for Google's internal RPC
// framework. We apologize for hoarding these numbers to ourselves, but
// we were already using them long before we decided to release Protocol
// Buffers.
// Is this method deprecated?
// Depending on the target platform, this can emit Deprecated annotations
// for the method, or it will be completely ignored; in the very least,
// this is a formalization for deprecating methods.
optional bool deprecated = 33 [default=false];
// Is this method side-effect-free (or safe in HTTP parlance), or idempotent,
// or neither? HTTP based RPC implementation may choose GET verb for safe
// methods, and PUT verb for idempotent methods instead of the default POST.
enum IdempotencyLevel {
IDEMPOTENCY_UNKNOWN = 0;
NO_SIDE_EFFECTS = 1; // implies idempotent
IDEMPOTENT = 2; // idempotent, but may have side effects
}
optional IdempotencyLevel idempotency_level =
34 [default=IDEMPOTENCY_UNKNOWN];
// The parser stores options it doesn't recognize here. See above.
repeated UninterpretedOption uninterpreted_option = 999;
// Clients can define custom options in extensions of this message. See above.
extensions 1000 to max;
}
// A message representing a option the parser does not recognize. This only
// appears in options protos created by the compiler::Parser class.
// DescriptorPool resolves these when building Descriptor objects. Therefore,
// options protos in descriptor objects (e.g. returned by Descriptor::options(),
// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
// in them.
message UninterpretedOption {
// The name of the uninterpreted option. Each string represents a segment in
// a dot-separated name. is_extension is true iff a segment represents an
// extension (denoted with parentheses in options specs in .proto files).
// E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
// "foo.(bar.baz).qux".
message NamePart {
required string name_part = 1;
required bool is_extension = 2;
}
repeated NamePart name = 2;
// The value of the uninterpreted option, in whatever type the tokenizer
// identified it as during parsing. Exactly one of these should be set.
optional string identifier_value = 3;
optional uint64 positive_int_value = 4;
optional int64 negative_int_value = 5;
optional double double_value = 6;
optional bytes string_value = 7;
optional string aggregate_value = 8;
}
// ===================================================================
// Optional source code info
// Encapsulates information about the original source file from which a
// FileDescriptorProto was generated.
message SourceCodeInfo {
// A Location identifies a piece of source code in a .proto file which
// corresponds to a particular definition. This information is intended
// to be useful to IDEs, code indexers, documentation generators, and similar
// tools.
//
// For example, say we have a file like:
// message Foo {
// optional string foo = 1;
// }
// Let's look at just the field definition:
// optional string foo = 1;
// ^ ^^ ^^ ^ ^^^
// a bc de f ghi
// We have the following locations:
// span path represents
// [a,i) [ 4, 0, 2, 0 ] The whole field definition.
// [a,b) [ 4, 0, 2, 0, 4 ] The label (optional).
// [c,d) [ 4, 0, 2, 0, 5 ] The type (string).
// [e,f) [ 4, 0, 2, 0, 1 ] The name (foo).
// [g,h) [ 4, 0, 2, 0, 3 ] The number (1).
//
// Notes:
// - A location may refer to a repeated field itself (i.e. not to any
// particular index within it). This is used whenever a set of elements are
// logically enclosed in a single code segment. For example, an entire
// extend block (possibly containing multiple extension definitions) will
// have an outer location whose path refers to the "extensions" repeated
// field without an index.
// - Multiple locations may have the same path. This happens when a single
// logical declaration is spread out across multiple places. The most
// obvious example is the "extend" block again -- there may be multiple
// extend blocks in the same scope, each of which will have the same path.
// - A location's span is not always a subset of its parent's span. For
// example, the "extendee" of an extension declaration appears at the
// beginning of the "extend" block and is shared by all extensions within
// the block.
// - Just because a location's span is a subset of some other location's span
// does not mean that it is a descendent. For example, a "group" defines
// both a type and a field in a single declaration. Thus, the locations
// corresponding to the type and field and their components will overlap.
// - Code which tries to interpret locations should probably be designed to
// ignore those that it doesn't understand, as more types of locations could
// be recorded in the future.
repeated Location location = 1;
message Location {
// Identifies which part of the FileDescriptorProto was defined at this
// location.
//
// Each element is a field number or an index. They form a path from
// the root FileDescriptorProto to the place where the definition. For
// example, this path:
// [ 4, 3, 2, 7, 1 ]
// refers to:
// file.message_type(3) // 4, 3
// .field(7) // 2, 7
// .name() // 1
// This is because FileDescriptorProto.message_type has field number 4:
// repeated DescriptorProto message_type = 4;
// and DescriptorProto.field has field number 2:
// repeated FieldDescriptorProto field = 2;
// and FieldDescriptorProto.name has field number 1:
// optional string name = 1;
//
// Thus, the above path gives the location of a field name. If we removed
// the last element:
// [ 4, 3, 2, 7 ]
// this path refers to the whole field declaration (from the beginning
// of the label to the terminating semicolon).
repeated int32 path = 1 [packed=true];
// Always has exactly three or four elements: start line, start column,
// end line (optional, otherwise assumed same as start line), end column.
// These are packed into a single field for efficiency. Note that line
// and column numbers are zero-based -- typically you will want to add
// 1 to each before displaying to a user.
repeated int32 span = 2 [packed=true];
// If this SourceCodeInfo represents a complete declaration, these are any
// comments appearing before and after the declaration which appear to be
// attached to the declaration.
//
// A series of line comments appearing on consecutive lines, with no other
// tokens appearing on those lines, will be treated as a single comment.
//
// leading_detached_comments will keep paragraphs of comments that appear
// before (but not connected to) the current element. Each paragraph,
// separated by empty lines, will be one comment element in the repeated
// field.
//
// Only the comment content is provided; comment markers (e.g. //) are
// stripped out. For block comments, leading whitespace and an asterisk
// will be stripped from the beginning of each line other than the first.
// Newlines are included in the output.
//
// Examples:
//
// optional int32 foo = 1; // Comment attached to foo.
// // Comment attached to bar.
// optional int32 bar = 2;
//
// optional string baz = 3;
// // Comment attached to baz.
// // Another line attached to baz.
//
// // Comment attached to qux.
// //
// // Another line attached to qux.
// optional double qux = 4;
//
// // Detached comment for corge. This is not leading or trailing comments
// // to qux or corge because there are blank lines separating it from
// // both.
//
// // Detached comment for corge paragraph 2.
//
// optional string corge = 5;
// /* Block comment attached
// * to corge. Leading asterisks
// * will be removed. */
// /* Block comment attached to
// * grault. */
// optional int32 grault = 6;
//
// // ignored detached comments.
optional string leading_comments = 3;
optional string trailing_comments = 4;
repeated string leading_detached_comments = 6;
}
}
// Describes the relationship between generated code and its original source
// file. A GeneratedCodeInfo message is associated with only one generated
// source file, but may contain references to different source .proto files.
message GeneratedCodeInfo {
// An Annotation connects some span of text in generated code to an element
// of its generating .proto file.
repeated Annotation annotation = 1;
message Annotation {
// Identifies the element in the original source .proto file. This field
// is formatted the same as SourceCodeInfo.Location.path.
repeated int32 path = 1 [packed=true];
// Identifies the filesystem path to the original source .proto.
optional string source_file = 2;
// Identifies the starting offset in bytes in the generated code
// that relates to the identified object.
optional int32 begin = 3;
// Identifies the ending offset in bytes in the generated code that
// relates to the identified offset. The end offset should be one past
// the last relevant byte (so the length of the text = end - begin).
optional int32 end = 4;
}
}

View File

@ -0,0 +1,117 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option cc_enable_arenas = true;
option go_package = "types";
option java_package = "com.google.protobuf";
option java_outer_classname = "DurationProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
// A Duration represents a signed, fixed-length span of time represented
// as a count of seconds and fractions of seconds at nanosecond
// resolution. It is independent of any calendar and concepts like "day"
// or "month". It is related to Timestamp in that the difference between
// two Timestamp values is a Duration and it can be added or subtracted
// from a Timestamp. Range is approximately +-10,000 years.
//
// # Examples
//
// Example 1: Compute Duration from two Timestamps in pseudo code.
//
// Timestamp start = ...;
// Timestamp end = ...;
// Duration duration = ...;
//
// duration.seconds = end.seconds - start.seconds;
// duration.nanos = end.nanos - start.nanos;
//
// if (duration.seconds < 0 && duration.nanos > 0) {
// duration.seconds += 1;
// duration.nanos -= 1000000000;
// } else if (durations.seconds > 0 && duration.nanos < 0) {
// duration.seconds -= 1;
// duration.nanos += 1000000000;
// }
//
// Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
//
// Timestamp start = ...;
// Duration duration = ...;
// Timestamp end = ...;
//
// end.seconds = start.seconds + duration.seconds;
// end.nanos = start.nanos + duration.nanos;
//
// if (end.nanos < 0) {
// end.seconds -= 1;
// end.nanos += 1000000000;
// } else if (end.nanos >= 1000000000) {
// end.seconds += 1;
// end.nanos -= 1000000000;
// }
//
// Example 3: Compute Duration from datetime.timedelta in Python.
//
// td = datetime.timedelta(days=3, minutes=10)
// duration = Duration()
// duration.FromTimedelta(td)
//
// # JSON Mapping
//
// In JSON format, the Duration type is encoded as a string rather than an
// object, where the string ends in the suffix "s" (indicating seconds) and
// is preceded by the number of seconds, with nanoseconds expressed as
// fractional seconds. For example, 3 seconds with 0 nanoseconds should be
// encoded in JSON format as "3s", while 3 seconds and 1 nanosecond should
// be expressed in JSON format as "3.000000001s", and 3 seconds and 1
// microsecond should be expressed in JSON format as "3.000001s".
//
//
message Duration {
// Signed seconds of the span of time. Must be from -315,576,000,000
// to +315,576,000,000 inclusive. Note: these bounds are computed from:
// 60 sec/min * 60 min/hr * 24 hr/day * 365.25 days/year * 10000 years
int64 seconds = 1;
// Signed fractions of a second at nanosecond resolution of the span
// of time. Durations less than one second are represented with a 0
// `seconds` field and a positive or negative `nanos` field. For durations
// of one second or more, a non-zero value for the `nanos` field must be
// of the same sign as the `seconds` field. Must be from -999,999,999
// to +999,999,999 inclusive.
int32 nanos = 2;
}

View File

@ -0,0 +1,52 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option go_package = "types";
option java_package = "com.google.protobuf";
option java_outer_classname = "EmptyProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
option cc_enable_arenas = true;
// A generic empty message that you can re-use to avoid defining duplicated
// empty messages in your APIs. A typical example is to use it as the request
// or the response type of an API method. For instance:
//
// service Foo {
// rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
// }
//
// The JSON representation for `Empty` is empty JSON object `{}`.
message Empty {}

View File

@ -0,0 +1,252 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option java_package = "com.google.protobuf";
option java_outer_classname = "FieldMaskProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
option go_package = "types";
// `FieldMask` represents a set of symbolic field paths, for example:
//
// paths: "f.a"
// paths: "f.b.d"
//
// Here `f` represents a field in some root message, `a` and `b`
// fields in the message found in `f`, and `d` a field found in the
// message in `f.b`.
//
// Field masks are used to specify a subset of fields that should be
// returned by a get operation or modified by an update operation.
// Field masks also have a custom JSON encoding (see below).
//
// # Field Masks in Projections
//
// When used in the context of a projection, a response message or
// sub-message is filtered by the API to only contain those fields as
// specified in the mask. For example, if the mask in the previous
// example is applied to a response message as follows:
//
// f {
// a : 22
// b {
// d : 1
// x : 2
// }
// y : 13
// }
// z: 8
//
// The result will not contain specific values for fields x,y and z
// (their value will be set to the default, and omitted in proto text
// output):
//
//
// f {
// a : 22
// b {
// d : 1
// }
// }
//
// A repeated field is not allowed except at the last position of a
// paths string.
//
// If a FieldMask object is not present in a get operation, the
// operation applies to all fields (as if a FieldMask of all fields
// had been specified).
//
// Note that a field mask does not necessarily apply to the
// top-level response message. In case of a REST get operation, the
// field mask applies directly to the response, but in case of a REST
// list operation, the mask instead applies to each individual message
// in the returned resource list. In case of a REST custom method,
// other definitions may be used. Where the mask applies will be
// clearly documented together with its declaration in the API. In
// any case, the effect on the returned resource/resources is required
// behavior for APIs.
//
// # Field Masks in Update Operations
//
// A field mask in update operations specifies which fields of the
// targeted resource are going to be updated. The API is required
// to only change the values of the fields as specified in the mask
// and leave the others untouched. If a resource is passed in to
// describe the updated values, the API ignores the values of all
// fields not covered by the mask.
//
// If a repeated field is specified for an update operation, the existing
// repeated values in the target resource will be overwritten by the new values.
// Note that a repeated field is only allowed in the last position of a `paths`
// string.
//
// If a sub-message is specified in the last position of the field mask for an
// update operation, then the existing sub-message in the target resource is
// overwritten. Given the target message:
//
// f {
// b {
// d : 1
// x : 2
// }
// c : 1
// }
//
// And an update message:
//
// f {
// b {
// d : 10
// }
// }
//
// then if the field mask is:
//
// paths: "f.b"
//
// then the result will be:
//
// f {
// b {
// d : 10
// }
// c : 1
// }
//
// However, if the update mask was:
//
// paths: "f.b.d"
//
// then the result would be:
//
// f {
// b {
// d : 10
// x : 2
// }
// c : 1
// }
//
// In order to reset a field's value to the default, the field must
// be in the mask and set to the default value in the provided resource.
// Hence, in order to reset all fields of a resource, provide a default
// instance of the resource and set all fields in the mask, or do
// not provide a mask as described below.
//
// If a field mask is not present on update, the operation applies to
// all fields (as if a field mask of all fields has been specified).
// Note that in the presence of schema evolution, this may mean that
// fields the client does not know and has therefore not filled into
// the request will be reset to their default. If this is unwanted
// behavior, a specific service may require a client to always specify
// a field mask, producing an error if not.
//
// As with get operations, the location of the resource which
// describes the updated values in the request message depends on the
// operation kind. In any case, the effect of the field mask is
// required to be honored by the API.
//
// ## Considerations for HTTP REST
//
// The HTTP kind of an update operation which uses a field mask must
// be set to PATCH instead of PUT in order to satisfy HTTP semantics
// (PUT must only be used for full updates).
//
// # JSON Encoding of Field Masks
//
// In JSON, a field mask is encoded as a single string where paths are
// separated by a comma. Fields name in each path are converted
// to/from lower-camel naming conventions.
//
// As an example, consider the following message declarations:
//
// message Profile {
// User user = 1;
// Photo photo = 2;
// }
// message User {
// string display_name = 1;
// string address = 2;
// }
//
// In proto a field mask for `Profile` may look as such:
//
// mask {
// paths: "user.display_name"
// paths: "photo"
// }
//
// In JSON, the same mask is represented as below:
//
// {
// mask: "user.displayName,photo"
// }
//
// # Field Masks and Oneof Fields
//
// Field masks treat fields in oneofs just as regular fields. Consider the
// following message:
//
// message SampleMessage {
// oneof test_oneof {
// string name = 4;
// SubMessage sub_message = 9;
// }
// }
//
// The field mask can be:
//
// mask {
// paths: "name"
// }
//
// Or:
//
// mask {
// paths: "sub_message"
// }
//
// Note that oneof type names ("test_oneof" in this case) cannot be used in
// paths.
//
// ## Field Mask Verification
//
// The implementation of the all the API methods, which have any FieldMask type
// field in the request, should verify the included field paths, and return
// `INVALID_ARGUMENT` error if any path is duplicated or unmappable.
message FieldMask {
// The set of field mask paths.
repeated string paths = 1;
}

View File

@ -0,0 +1,48 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option java_package = "com.google.protobuf";
option java_outer_classname = "SourceContextProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
option go_package = "types";
// `SourceContext` represents information about the source of a
// protobuf element, like the file in which it is defined.
message SourceContext {
// The path-qualified name of the .proto file that contained the associated
// protobuf element. For example: `"google/protobuf/source_context.proto"`.
string file_name = 1;
}

View File

@ -0,0 +1,96 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option cc_enable_arenas = true;
option go_package = "types";
option java_package = "com.google.protobuf";
option java_outer_classname = "StructProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
// `Struct` represents a structured data value, consisting of fields
// which map to dynamically typed values. In some languages, `Struct`
// might be supported by a native representation. For example, in
// scripting languages like JS a struct is represented as an
// object. The details of that representation are described together
// with the proto support for the language.
//
// The JSON representation for `Struct` is JSON object.
message Struct {
// Unordered map of dynamically typed values.
map<string, Value> fields = 1;
}
// `Value` represents a dynamically typed value which can be either
// null, a number, a string, a boolean, a recursive struct value, or a
// list of values. A producer of value is expected to set one of that
// variants, absence of any variant indicates an error.
//
// The JSON representation for `Value` is JSON value.
message Value {
// The kind of value.
oneof kind {
// Represents a null value.
NullValue null_value = 1;
// Represents a double value.
double number_value = 2;
// Represents a string value.
string string_value = 3;
// Represents a boolean value.
bool bool_value = 4;
// Represents a structured value.
Struct struct_value = 5;
// Represents a repeated `Value`.
ListValue list_value = 6;
}
}
// `NullValue` is a singleton enumeration to represent the null value for the
// `Value` type union.
//
// The JSON representation for `NullValue` is JSON `null`.
enum NullValue {
// Null value.
NULL_VALUE = 0;
}
// `ListValue` is a wrapper around a repeated field of values.
//
// The JSON representation for `ListValue` is JSON array.
message ListValue {
// Repeated field of dynamically typed values.
repeated Value values = 1;
}

View File

@ -0,0 +1,135 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option cc_enable_arenas = true;
option go_package = "types";
option java_package = "com.google.protobuf";
option java_outer_classname = "TimestampProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
// A Timestamp represents a point in time independent of any time zone
// or calendar, represented as seconds and fractions of seconds at
// nanosecond resolution in UTC Epoch time. It is encoded using the
// Proleptic Gregorian Calendar which extends the Gregorian calendar
// backwards to year one. It is encoded assuming all minutes are 60
// seconds long, i.e. leap seconds are "smeared" so that no leap second
// table is needed for interpretation. Range is from
// 0001-01-01T00:00:00Z to 9999-12-31T23:59:59.999999999Z.
// By restricting to that range, we ensure that we can convert to
// and from RFC 3339 date strings.
// See [https://www.ietf.org/rfc/rfc3339.txt](https://www.ietf.org/rfc/rfc3339.txt).
//
// # Examples
//
// Example 1: Compute Timestamp from POSIX `time()`.
//
// Timestamp timestamp;
// timestamp.set_seconds(time(NULL));
// timestamp.set_nanos(0);
//
// Example 2: Compute Timestamp from POSIX `gettimeofday()`.
//
// struct timeval tv;
// gettimeofday(&tv, NULL);
//
// Timestamp timestamp;
// timestamp.set_seconds(tv.tv_sec);
// timestamp.set_nanos(tv.tv_usec * 1000);
//
// Example 3: Compute Timestamp from Win32 `GetSystemTimeAsFileTime()`.
//
// FILETIME ft;
// GetSystemTimeAsFileTime(&ft);
// UINT64 ticks = (((UINT64)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
//
// // A Windows tick is 100 nanoseconds. Windows epoch 1601-01-01T00:00:00Z
// // is 11644473600 seconds before Unix epoch 1970-01-01T00:00:00Z.
// Timestamp timestamp;
// timestamp.set_seconds((INT64) ((ticks / 10000000) - 11644473600LL));
// timestamp.set_nanos((INT32) ((ticks % 10000000) * 100));
//
// Example 4: Compute Timestamp from Java `System.currentTimeMillis()`.
//
// long millis = System.currentTimeMillis();
//
// Timestamp timestamp = Timestamp.newBuilder().setSeconds(millis / 1000)
// .setNanos((int) ((millis % 1000) * 1000000)).build();
//
//
// Example 5: Compute Timestamp from current time in Python.
//
// timestamp = Timestamp()
// timestamp.GetCurrentTime()
//
// # JSON Mapping
//
// In JSON format, the Timestamp type is encoded as a string in the
// [RFC 3339](https://www.ietf.org/rfc/rfc3339.txt) format. That is, the
// format is "{year}-{month}-{day}T{hour}:{min}:{sec}[.{frac_sec}]Z"
// where {year} is always expressed using four digits while {month}, {day},
// {hour}, {min}, and {sec} are zero-padded to two digits each. The fractional
// seconds, which can go up to 9 digits (i.e. up to 1 nanosecond resolution),
// are optional. The "Z" suffix indicates the timezone ("UTC"); the timezone
// is required. A proto3 JSON serializer should always use UTC (as indicated by
// "Z") when printing the Timestamp type and a proto3 JSON parser should be
// able to accept both UTC and other timezones (as indicated by an offset).
//
// For example, "2017-01-15T01:30:15.01Z" encodes 15.01 seconds past
// 01:30 UTC on January 15, 2017.
//
// In JavaScript, one can convert a Date object to this format using the
// standard [toISOString()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/toISOString]
// method. In Python, a standard `datetime.datetime` object can be converted
// to this format using [`strftime`](https://docs.python.org/2/library/time.html#time.strftime)
// with the time format spec '%Y-%m-%dT%H:%M:%S.%fZ'. Likewise, in Java, one
// can use the Joda Time's [`ISODateTimeFormat.dateTime()`](
// http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime--
// ) to obtain a formatter capable of generating timestamps in this format.
//
//
message Timestamp {
// Represents seconds of UTC time since Unix epoch
// 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to
// 9999-12-31T23:59:59Z inclusive.
int64 seconds = 1;
// Non-negative fractions of a second at nanosecond resolution. Negative
// second values with fractions must still have non-negative nanos values
// that count forward in time. Must be from 0 to 999,999,999
// inclusive.
int32 nanos = 2;
}

View File

@ -0,0 +1,187 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package google.protobuf;
import "google/protobuf/any.proto";
import "google/protobuf/source_context.proto";
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option cc_enable_arenas = true;
option java_package = "com.google.protobuf";
option java_outer_classname = "TypeProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
option go_package = "types";
// A protocol buffer message type.
message Type {
// The fully qualified message name.
string name = 1;
// The list of fields.
repeated Field fields = 2;
// The list of types appearing in `oneof` definitions in this type.
repeated string oneofs = 3;
// The protocol buffer options.
repeated Option options = 4;
// The source context.
SourceContext source_context = 5;
// The source syntax.
Syntax syntax = 6;
}
// A single field of a message type.
message Field {
// Basic field types.
enum Kind {
// Field type unknown.
TYPE_UNKNOWN = 0;
// Field type double.
TYPE_DOUBLE = 1;
// Field type float.
TYPE_FLOAT = 2;
// Field type int64.
TYPE_INT64 = 3;
// Field type uint64.
TYPE_UINT64 = 4;
// Field type int32.
TYPE_INT32 = 5;
// Field type fixed64.
TYPE_FIXED64 = 6;
// Field type fixed32.
TYPE_FIXED32 = 7;
// Field type bool.
TYPE_BOOL = 8;
// Field type string.
TYPE_STRING = 9;
// Field type group. Proto2 syntax only, and deprecated.
TYPE_GROUP = 10;
// Field type message.
TYPE_MESSAGE = 11;
// Field type bytes.
TYPE_BYTES = 12;
// Field type uint32.
TYPE_UINT32 = 13;
// Field type enum.
TYPE_ENUM = 14;
// Field type sfixed32.
TYPE_SFIXED32 = 15;
// Field type sfixed64.
TYPE_SFIXED64 = 16;
// Field type sint32.
TYPE_SINT32 = 17;
// Field type sint64.
TYPE_SINT64 = 18;
};
// Whether a field is optional, required, or repeated.
enum Cardinality {
// For fields with unknown cardinality.
CARDINALITY_UNKNOWN = 0;
// For optional fields.
CARDINALITY_OPTIONAL = 1;
// For required fields. Proto2 syntax only.
CARDINALITY_REQUIRED = 2;
// For repeated fields.
CARDINALITY_REPEATED = 3;
};
// The field type.
Kind kind = 1;
// The field cardinality.
Cardinality cardinality = 2;
// The field number.
int32 number = 3;
// The field name.
string name = 4;
// The field type URL, without the scheme, for message or enumeration
// types. Example: `"type.googleapis.com/google.protobuf.Timestamp"`.
string type_url = 6;
// The index of the field type in `Type.oneofs`, for message or enumeration
// types. The first type has index 1; zero means the type is not in the list.
int32 oneof_index = 7;
// Whether to use alternative packed wire representation.
bool packed = 8;
// The protocol buffer options.
repeated Option options = 9;
// The field JSON name.
string json_name = 10;
// The string value of the default value of this field. Proto2 syntax only.
string default_value = 11;
}
// Enum type definition.
message Enum {
// Enum type name.
string name = 1;
// Enum value definitions.
repeated EnumValue enumvalue = 2;
// Protocol buffer options.
repeated Option options = 3;
// The source context.
SourceContext source_context = 4;
// The source syntax.
Syntax syntax = 5;
}
// Enum value definition.
message EnumValue {
// Enum value name.
string name = 1;
// Enum value number.
int32 number = 2;
// Protocol buffer options.
repeated Option options = 3;
}
// A protocol buffer option, which can be attached to a message, field,
// enumeration, etc.
message Option {
// The option's name. For protobuf built-in options (options defined in
// descriptor.proto), this is the short name. For example, `"map_entry"`.
// For custom options, it should be the fully-qualified name. For example,
// `"google.api.http"`.
string name = 1;
// The option's value packed in an Any message. If the value is a primitive,
// the corresponding wrapper type defined in google/protobuf/wrappers.proto
// should be used. If the value is an enum, it should be stored as an int32
// value using the google.protobuf.Int32Value type.
Any value = 2;
}
// The syntax in which a protocol buffer element is defined.
enum Syntax {
// Syntax `proto2`.
SYNTAX_PROTO2 = 0;
// Syntax `proto3`.
SYNTAX_PROTO3 = 1;
}

View File

@ -0,0 +1,118 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Wrappers for primitive (non-message) types. These types are useful
// for embedding primitives in the `google.protobuf.Any` type and for places
// where we need to distinguish between the absence of a primitive
// typed field and its default value.
syntax = "proto3";
package google.protobuf;
option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option cc_enable_arenas = true;
option go_package = "types";
option java_package = "com.google.protobuf";
option java_outer_classname = "WrappersProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";
// Wrapper message for `double`.
//
// The JSON representation for `DoubleValue` is JSON number.
message DoubleValue {
// The double value.
double value = 1;
}
// Wrapper message for `float`.
//
// The JSON representation for `FloatValue` is JSON number.
message FloatValue {
// The float value.
float value = 1;
}
// Wrapper message for `int64`.
//
// The JSON representation for `Int64Value` is JSON string.
message Int64Value {
// The int64 value.
int64 value = 1;
}
// Wrapper message for `uint64`.
//
// The JSON representation for `UInt64Value` is JSON string.
message UInt64Value {
// The uint64 value.
uint64 value = 1;
}
// Wrapper message for `int32`.
//
// The JSON representation for `Int32Value` is JSON number.
message Int32Value {
// The int32 value.
int32 value = 1;
}
// Wrapper message for `uint32`.
//
// The JSON representation for `UInt32Value` is JSON number.
message UInt32Value {
// The uint32 value.
uint32 value = 1;
}
// Wrapper message for `bool`.
//
// The JSON representation for `BoolValue` is JSON `true` and `false`.
message BoolValue {
// The bool value.
bool value = 1;
}
// Wrapper message for `string`.
//
// The JSON representation for `StringValue` is JSON string.
message StringValue {
// The string value.
string value = 1;
}
// Wrapper message for `bytes`.
//
// The JSON representation for `BytesValue` is JSON string.
message BytesValue {
// The bytes value.
bytes value = 1;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More