Database open time is really high

FYI, there’s a seek missing in readIndex with your latest commit otherwise it fails while flushing the memtable.

Here’s a repro:

package main

import (
        "fmt"
        "os"
        "os/signal"
        "strings"
        "syscall"

        "github.com/dgraph-io/badger"
        "github.com/sirupsen/logrus"
)

func main() {
        opts := badger.DefaultOptions
        opts.Dir = "/home/vincent/tmp/badger-bug-data"
        opts.ValueDir = opts.Dir

        db, err := badger.Open(opts)
        if err != nil {
                logrus.Fatal(err)
        }

        wb := db.NewWriteBatch()

        for i := 0; i < 100000; i++ {
                s := strings.Repeat("a", 10000)

                err := wb.Set([]byte(fmt.Sprintf("%d", i)), []byte(s), 0)
                if err != nil {
                        logrus.Fatal(err)
                }
        }

        signalCh := make(chan os.Signal)
        go signal.Notify(signalCh, syscall.SIGTERM, syscall.SIGINT)
        <-signalCh

        err = wb.Flush()
        if err != nil {
                logrus.Fatal(err)
        }
        wb.Cancel()

        err = db.Close()
        if err != nil {
                logrus.Fatal(err)
        }
}

It failed like this indefinitly:

badger 2019/01/11 14:50:14 INFO: Storing value log head: {Fid:2 Len:42 Offset:1003488890}
badger 2019/01/11 14:50:14 ERROR: Failure while flushing memtable to disk: : : EOF. Retrying...
badger 2019/01/11 14:50:15 INFO: Storing value log head: {Fid:2 Len:42 Offset:1003488890}
badger 2019/01/11 14:50:15 ERROR: Failure while flushing memtable to disk: : : EOF. Retrying...
badger 2019/01/11 14:50:16 INFO: Storing value log head: {Fid:2 Len:42 Offset:1003488890}
badger 2019/01/11 14:50:17 ERROR: Failure while flushing memtable to disk: : : EOF. Retrying...
badger 2019/01/11 14:50:18 INFO: Storing value log head: {Fid:2 Len:42 Offset:1003488890}
badger 2019/01/11 14:50:18 ERROR: Failure while flushing memtable to disk: : : EOF. Retrying...
badger 2019/01/11 14:50:19 INFO: Storing value log head: {Fid:2 Len:42 Offset:1003488890}

I fixed it with this:

index 8978c98..2fcab39 100644
--- a/table/table.go
+++ b/table/table.go
@@ -242,6 +242,11 @@ func (t *Table) readIndex() error {
                t.blockIndex = append(t.blockIndex, ko)
        }

+       _, err := t.fd.Seek(0, os.SEEK_SET)
+       if err != nil {
+               return err
+       }
+
        // Execute this index read serially, because all disks are orders of magnitude faster when read
        // serially compared to executing random reads.
        var h header

which seems to work.

I’m currently in the process of recreating a database from scratch with a big dataset, we’ll see how it goes.