倒排索引
prometheus tsdb中的index以倒排索引的方式组织:
- 给每个series分配1个id
- 用seriesId查询series,这是前向索引,查询时间复杂度=O(1);
- 构造label的索引
- 若seriesId={2,5,10,29}都含有label: app='nginx';
- 那么,对于app='nginx", {2,5,10,29}就是它的倒排索引;
// seriesId=5
{
__name__ = "request_total",
pod="nginx-1",
path="/api/v1/status",
status="200",
method="GET"
}
那么,对于:
- status="200": 它的倒排索引={1,2,5,......}
- method="GET": 它的倒排索引={2,3,4,5,6,9,......}
block中,使用blockIndexReader,读取block目录中的index文件,将其中的label组织倒排索引;
headIndexReader和blockIndexReader均继承自indexReader,提供了:
- LabelNames(): 查询所有的Label key;
- LabelValues(name):查询label key对应的values;
- Postings():查询label key/value对应的[]seriesId;
文章图片
内存中的倒排索引 数据结构:
// tsdb/index/postings.go
type MemPostings struct {
mtxsync.RWMutex// label key --> []labelValue
valuesmap[string]stringset // Label names to possible values.// map[labelName]map[labelValue]postingsList
// labelName --> labelValue --> []posting
mmap[string]map[string][]uint64
ordered bool
}// tsdb/head.go
// Head handles reads and writes of time series data within a time window.
type Head struct {
......
postings *index.MemPostings // Postings lists for terms.
}
1-内存倒排索引的插入
入口是插入时序数据:
- 如果lset已经在series中了,则直接返回;
- 否则获取一个seriesId:
- 将label key/value插入到h.values;
- 将label key/value和seriesId插入到h.postings中(大map);
// tsdb/head.go
func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
......
s, created, err := a.head.getOrCreate(lset.Hash(), lset)
......
}func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool, error) {
s := h.series.getByHash(hash, lset)
// 已经有了,直接返回
if s != nil {
return s, false, nil
}
id := atomic.AddUint64(&h.lastSeriesID, 1)
return h.getOrCreateWithID(id, hash, lset)
}
插入到h.values和h.postings:
// tsdb/head.go
func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool, error) {
s := newMemSeries(lset, id, h.chunkRange, &h.memChunkPool)
......
// 将label key/value插入 h.values
for _, l := range lset {
valset, ok := h.values[l.Name]
if !ok {
valset = stringset{}
h.values[l.Name] = valset
}
// 插入key,value
valset.set(l.Value)
......
}
// id=seriesId
// 将key/value/seriesId插入h.postings
h.postings.Add(id, lset)
return s, true, nil
}
2-内存倒排索引的查询
主要在headIndexReader中进行:
- 通过LableNames()查询所有的lableName;
- 通过LabelValues(name)查询labelName对应的labelValues;
- 通过postings查询到key、value对应的[]seriesId,最终使用seriesId+chunkReader查询最终的时序数据(t/v);
// tsdb/head.go
func (h *headIndexReader) LabelNames() ([]string, error) {
labelNames := make([]string, 0, len(h.head.values))
// 读h.head.values
for name := range h.head.values {
if name == "" {
continue
}
labelNames = append(labelNames, name)
}
sort.Strings(labelNames)
return labelNames, nil
}
// tsdb/head.go
func (h *headIndexReader) LabelValues(name string) ([]string, error) {
sl := make([]string, 0, len(h.head.values[name]))
// 读h.head.values
for s := range h.head.values[name] {
sl = append(sl, s)
}
return sl, nil
}
Postings()提供了查询key/values对应的[]seriesId的功能:
// tsdb/head.go
// Postings returns the postings list iterator for the label pairs.
func (h *headIndexReader) Postings(name string, values ...string) (index.Postings, error) {
res := make([]index.Postings, 0, len(values))
for _, value := range values {
res = append(res, h.head.postings.Get(name, value))
}
return index.Merge(res...), nil
}
block中的倒排索引 数据结构:
// tsdb/index/index.go
type Reader struct {
......
// labelName--> labelValue + offset
postings map[string][]postingOffset
......
}
block中的倒排索引,是read磁盘block中的index文件得到。
查询LabelNames():
- 具体读取由Reader.LableNames()实现;
- blockIndexReader.LabelNames()最终调用Reader.LabelNames();
// tsdb/index/index.go
// LabelNames returns all the unique label names present in the index.
func (r *Reader) LabelNames() ([]string, error) {
labelNames := make([]string, 0, len(r.postings))
// 读r.postings
for name := range r.postings {
......
labelNames = append(labelNames, name)
}
sort.Strings(labelNames)
return labelNames, nil
}// tsdb/block.go
func (r blockIndexReader) LabelNames() ([]string, error) {
return r.b.LabelNames()
}
查询LabelValues(name):
- 具体读取由Reader.LabelValues读toc.PostingsTable实现;
- blockIndexReader.LabelValues()最终调用Reader.LabelValues();
// tsdb/index/index.go
// LabelValues returns value tuples that exist for the given label name.
func (r *Reader) LabelValues(name string) ([]string, error) {
......
e, ok := r.postings[name]
values := make([]string, 0, len(e)*symbolFactor)
// 读toc.PostingsTable
d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil)
d.Skip(e[0].off)
lastVal := e[len(e)-1].valuefor d.Err() == nil {
......
s := yoloString(d.UvarintBytes()) //Label value.
values = append(values, s)
}
return values, nil
}// tsdb/block.go
func (r blockIndexReader) LabelValues(name string) ([]string, error) {
st, err := r.ir.LabelValues(name)
return st, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
}
查询Postings():
- 具体读取由Reader.Postings()读toc.PostingsTable实现;
- blockIndexReader.Postings()最终调用Reader.Postings();
// tsdb/index/index.go
func (r *Reader) Postings(name string, values ...string) (Postings, error) {
.....
e, ok := r.postings[name]
res := make([]Postings, 0, len(values))
for valueIndex < len(values) && values[valueIndex] < e[0].value {
// Discard values before the start.
valueIndex++
}
for valueIndex < len(values) {
value := values[valueIndex]
i := sort.Search(len(e), func(i int) bool { return e[i].value >= value })
d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil)
d.Skip(e[i].off)
for d.Err() == nil {
......
d2 := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable)
_, p, err := r.dec.Postings(d2.Get())
res = append(res, p)
}
}
return Merge(res...), nil
}// tsdb/block.go
func (r blockIndexReader) Postings(name string, values ...string) (index.Postings, error) {
p, err := r.ir.Postings(name, values...)
if err != nil {
return p, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
}
return p, nil
}
Postings()在查询时使用 内存和block使用Postings()进行查询的流程类似,只是使用不同的indexReader。
1) 查询入口:加载内存block和磁盘block,构造出blockQuerier
// tsdb/db.go
func (db *DB) Querier(_ context.Context, mint, maxt int64) (storage.Querier, error) {
var blocks []BlockReader
// 磁盘block
for _, b := range db.blocks {
if b.OverlapsClosedInterval(mint, maxt) {
blocks = append(blocks, b)
blockMetas = append(blockMetas, b.Meta())
}
}
// 内存block
if maxt >= db.head.MinTime() {
blocks = append(blocks, &RangeHead{
head: db.head,
mint: mint,
maxt: maxt,
})
}
blockQueriers := make([]storage.Querier, 0, len(blocks))
for _, b := range blocks {
q, err := NewBlockQuerier(b, mint, maxt)
if err == nil {
blockQueriers = append(blockQueriers, q)
continue
}
}
return &querier{
blocks: blockQueriers,
}, nil
}
可以看出:
- 对于内存block,使用RangeHead结构;
- 对于磁盘block,使用Block结构;
// tsdb/querier.go
// NewBlockQuerier returns a querier against the reader.
func NewBlockQuerier(b BlockReader, mint, maxt int64) (storage.Querier, error) {
// 构造headIndexReader
indexr, err := b.Index()
if err != nil {
return nil, errors.Wrapf(err, "open index reader")
}
chunkr, err := b.Chunks()
.....
return &blockQuerier{
mint:mint,
maxt:maxt,
index:indexr,
chunks:chunkr,
tombstones: tombsr,
}, nil
}
上述代码中,最重要的是:
// 根据不同的block构造出不同的indexReader
indexr, err := b.Index()
对于内存block(RangeHead): 最终构造的是headIndexReader
// tsdb/head.go
func (h *RangeHead) Index() (IndexReader, error) {
return h.head.indexRange(h.mint, h.maxt), nil
}func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
if hmin := h.MinTime();
hmin > mint {
mint = hmin
}
return &headIndexReader{head: h, mint: mint, maxt: maxt}
}
对于磁盘block(Block): 最终构造的是blockIndexReader
// tsdb/block.go
// Index returns a new IndexReader against the block data.
func (pb *Block) Index() (IndexReader, error) {
if err := pb.startRead();
err != nil {
return nil, err
}
return blockIndexReader{ir: pb.indexr, b: pb}, nil
}
3) BlockQuerier使用indexReader查询postings信息
查询seriesSet
func (q *blockQuerier) Select(sortSeries bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet {
......
if sortSeries {
base, err = LookupChunkSeriesSorted(q.index, q.tombstones, ms...)
} else {
base, err = LookupChunkSeries(q.index, q.tombstones, ms...)
}
......
}func lookupChunkSeries(sorted bool, ir IndexReader, tr tombstones.Reader, ms ...*labels.Matcher) (storage.DeprecatedChunkSeriesSet, error) {
......
// 最终会调用indexReader.Postings()
p, err := PostingsForMatchers(ir, ms...)
......
}
PostingsForMatchers最终会调用到indexReader.Postings()。
参考 【prometheus源码分析(index倒排索引)】1.https://ganeshvernekar.com/bl...
推荐阅读
- prometheus源码分析(rules模块)
- prometheus源码分析(scrape模块)
- prometheus源码分析(t/v数据的压缩、写入和读取)
- promethues源码剖析(head block)
- k8s|k8s hpa计算
- 我的大屏监控布局资料
- PromQL之label_replace/label_join