Skip to content

Commit

Permalink
storage: Recover from corrupted indices for archived series
Browse files Browse the repository at this point in the history
An unopenable archived_fingerprint_to_timerange is simply deleted and
will be rebuilt during crash recovery (wich can then take quite some time).

An unopenable archived_fingerprint_to_metric is not deleted but
instructions to the user are logged. A deletion has to be done by the
user explicitly as it means losing all archived series (and a repair
with a 3rd party tool might still be possible).
  • Loading branch information
beorn7 committed Apr 6, 2017
1 parent 9775ad4 commit 4fcc73a
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
27 changes: 17 additions & 10 deletions storage/local/index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ import (
"github.com/prometheus/prometheus/storage/local/codable"
)

// Directory names for LevelDB indices.
const (
fingerprintToMetricDir = "archived_fingerprint_to_metric"
fingerprintTimeRangeDir = "archived_fingerprint_to_timerange"
labelNameToLabelValuesDir = "labelname_to_labelvalues"
labelPairToFingerprintsDir = "labelpair_to_fingerprints"
FingerprintToMetricDir = "archived_fingerprint_to_metric"
FingerprintTimeRangeDir = "archived_fingerprint_to_timerange"
LabelNameToLabelValuesDir = "labelname_to_labelvalues"
LabelPairToFingerprintsDir = "labelpair_to_fingerprints"
)

// LevelDB cache sizes, changeable via flags.
Expand Down Expand Up @@ -96,7 +97,7 @@ func (i *FingerprintMetricIndex) Lookup(fp model.Fingerprint) (metric model.Metr
// ready to use.
func NewFingerprintMetricIndex(basePath string) (*FingerprintMetricIndex, error) {
fingerprintToMetricDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, fingerprintToMetricDir),
Path: filepath.Join(basePath, FingerprintToMetricDir),
CacheSizeBytes: FingerprintMetricCacheSize,
})
if err != nil {
Expand Down Expand Up @@ -168,7 +169,7 @@ func (i *LabelNameLabelValuesIndex) LookupSet(l model.LabelName) (values map[mod
// LabelNameLabelValuesIndex ready to use.
func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex, error) {
labelNameToLabelValuesDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, labelNameToLabelValuesDir),
Path: filepath.Join(basePath, LabelNameToLabelValuesDir),
CacheSizeBytes: LabelNameLabelValuesCacheSize,
})
if err != nil {
Expand All @@ -182,7 +183,7 @@ func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex,
// DeleteLabelNameLabelValuesIndex deletes the LevelDB-backed
// LabelNameLabelValuesIndex. Use only for a not yet opened index.
func DeleteLabelNameLabelValuesIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, labelNameToLabelValuesDir))
return os.RemoveAll(path.Join(basePath, LabelNameToLabelValuesDir))
}

// LabelPairFingerprintsMapping is an in-memory map of label pairs to
Expand Down Expand Up @@ -246,7 +247,7 @@ func (i *LabelPairFingerprintIndex) LookupSet(p model.LabelPair) (fps map[model.
// LabelPairFingerprintIndex ready to use.
func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex, error) {
labelPairToFingerprintsDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, labelPairToFingerprintsDir),
Path: filepath.Join(basePath, LabelPairToFingerprintsDir),
CacheSizeBytes: LabelPairFingerprintsCacheSize,
})
if err != nil {
Expand All @@ -260,7 +261,7 @@ func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex,
// DeleteLabelPairFingerprintIndex deletes the LevelDB-backed
// LabelPairFingerprintIndex. Use only for a not yet opened index.
func DeleteLabelPairFingerprintIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, labelPairToFingerprintsDir))
return os.RemoveAll(path.Join(basePath, LabelPairToFingerprintsDir))
}

// FingerprintTimeRangeIndex models a database tracking the time ranges
Expand All @@ -284,7 +285,7 @@ func (i *FingerprintTimeRangeIndex) Lookup(fp model.Fingerprint) (firstTime, las
// FingerprintTimeRangeIndex ready to use.
func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex, error) {
fingerprintTimeRangeDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, fingerprintTimeRangeDir),
Path: filepath.Join(basePath, FingerprintTimeRangeDir),
CacheSizeBytes: FingerprintTimeRangeCacheSize,
})
if err != nil {
Expand All @@ -294,3 +295,9 @@ func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex,
KeyValueStore: fingerprintTimeRangeDB,
}, nil
}

// DeleteFingerprintTimeRangeIndex deletes the LevelDB-backed
// FingerprintTimeRangeIndex. Use only for a not yet opened index.
func DeleteFingerprintTimeRangeIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, FingerprintTimeRangeDir))
}
16 changes: 15 additions & 1 deletion storage/local/persistence.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,25 @@ func newPersistence(

archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath)
if err != nil {
// At this point, we could simply blow away the archived
// fingerprint-to-metric index. However, then we would lose
// _all_ archived metrics. So better give the user an
// opportunity to repair the LevelDB with a 3rd party tool.
log.Errorf("Could not open the fingerprint-to-metric index for archived series. Please try a 3rd party tool to repair LevelDB in directory %q. If unsuccessful or undesired, delete the whole directory and restart Prometheus for crash recovery. You will lose all archived time series.", filepath.Join(basePath, index.FingerprintToMetricDir))
return nil, err
}
archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath)
if err != nil {
return nil, err
// We can recover the archived fingerprint-to-timerange index,
// so blow it away and set ourselves dirty. Then re-open the now
// empty index.
if err := index.DeleteFingerprintTimeRangeIndex(basePath); err != nil {
return nil, err
}
dirty = true
if archivedFingerprintToTimeRange, err = index.NewFingerprintTimeRangeIndex(basePath); err != nil {
return nil, err
}
}

p := &persistence{
Expand Down

0 comments on commit 4fcc73a

Please sign in to comment.