Hey @jzhu077
The snapshot error was a bug which I have fixed and pushed to dgraph/dgraph:test image. It will soon also be available on dgraph/dgraph:master image.
Here is a config that I tested. This helps me run 3 Dgraph Zero and 15 Dgraph Server’s with 5x replication. The cluster is up in < 2 mins. Give it a try. Also, not sure if you know but you can check the groups and the nodes in the group by going to http://zero_ip:6080/state. It should show three groups with 5 nodes in each group.
apiVersion: v1
kind: Service
metadata:
name: dgraph-server-public
labels:
app: dgraph-server
spec:
type: LoadBalancer
ports:
- port: 9080
targetPort: 9080
name: server-grpc
- port: 8080
targetPort: 8080
name: server-http
selector:
app: dgraph-server
---
apiVersion: v1
kind: Service
metadata:
name: dgraph-zero-public
labels:
app: dgraph-zero
spec:
type: LoadBalancer
ports:
- port: 5080
targetPort: 5080
name: zero-grpc
- port: 6080
targetPort: 6080
name: zero-http
selector:
app: dgraph-zero
---
apiVersion: v1
kind: Service
metadata:
name: dgraph-ratel
labels:
app: dgraph-ratel
spec:
type: LoadBalancer
ports:
- port: 8081
targetPort: 8081
name: ratel-http
selector:
app: dgraph-ratel
---
# This is a headless service which is neccessary for discovery for a dgraph-zero StatefulSet.
# https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#creating-a-statefulset
apiVersion: v1
kind: Service
metadata:
name: dgraph-zero
labels:
app: dgraph-zero
spec:
ports:
- port: 5080
targetPort: 5080
name: zero-grpc
clusterIP: None
selector:
app: dgraph-zero
---
# This is a headless service which is neccessary for discovery for a dgraph-server StatefulSet.
# https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#creating-a-statefulset
apiVersion: v1
kind: Service
metadata:
name: dgraph-server
labels:
app: dgraph-server
spec:
ports:
- port: 7080
targetPort: 7080
name: grpc
clusterIP: None
selector:
app: dgraph-server
---
# This StatefulSet runs 3 Dgraph Zero's.
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: dgraph-zero
spec:
serviceName: "dgraph-zero"
replicas: 3
template:
metadata:
labels:
app: dgraph-zero
spec:
containers:
- name: zero
image: dgraph/dgraph:test
imagePullPolicy: IfNotPresent
ports:
- containerPort: 5080
name: intra-node
volumeMounts:
- name: datadir
mountPath: /dgraph
command:
- bash
- "-c"
- |
set -ex
[[ `hostname` =~ -([0-9]+)$ ]] || exit 1
ordinal=${BASH_REMATCH[1]}
idx=$(($ordinal + 1))
if [[ $ordinal -eq 0 ]]; then
dgraph zero -o -2000 --my=$(hostname -f):5080 --idx $idx --replicas 5
else
dgraph zero -o -2000 --my=$(hostname -f):5080 --peer dgraph-zero-0.dgraph-zero.default.svc.cluster.local:5080 --idx $idx --replicas 5
fi
terminationGracePeriodSeconds: 60
volumes:
- name: datadir
persistentVolumeClaim:
claimName: datadir
updateStrategy:
type: RollingUpdate
volumeClaimTemplates:
- metadata:
name: datadir
annotations:
volume.alpha.kubernetes.io/storage-class: anything
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: 5Gi
---
# This StatefulSet runs 15 replicas of Dgraph Server.
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: dgraph-server
spec:
serviceName: "dgraph-server"
replicas: 15
template:
metadata:
labels:
app: dgraph-server
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- dgraph-server
topologyKey: kubernetes.io/hostname
containers:
- name: server
image: dgraph/dgraph:test
imagePullPolicy: IfNotPresent
ports:
volumeMounts:
- name: datadir
mountPath: /dgraph
command:
- bash
- "-c"
- |
set -ex
dgraph server --my=$(hostname -f):7080 --memory_mb 2048 --zero dgraph-zero-0.dgraph-zero.default.svc.cluster.local:5080
terminationGracePeriodSeconds: 60
volumes:
- name: datadir
persistentVolumeClaim:
claimName: datadir
updateStrategy:
type: RollingUpdate
volumeClaimTemplates:
- metadata:
name: datadir
annotations:
volume.alpha.kubernetes.io/storage-class: anything
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: 5Gi
---
apiVersion: apps/v1beta2
kind: Deployment
metadata:
name: dgraph-ratel
labels:
app: dgraph-ratel
spec:
selector:
matchLabels:
app: dgraph-ratel
template:
metadata:
labels:
app: dgraph-ratel
spec:
containers:
- name: ratel
image: dgraph/dgraph:test
ports:
- containerPort: 8081
command:
- dgraph-ratel