Skip to content

Commit 21555e8

Browse files
committed
Initial commit
1 parent 49af089 commit 21555e8

8 files changed

+139
-84
lines changed

charts/model-engine/templates/_helpers.tpl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,13 @@ env:
345345
- name: REDIS_AUTH_TOKEN
346346
value: {{ .Values.redis.auth }}
347347
{{- end }}
348+
{{- if .Values.redis.authSecret }}
349+
- name: REDIS_AUTH_TOKEN
350+
valueFrom:
351+
secretKeyRef:
352+
name: {{ .Values.redis.authSecret }}
353+
key: auth_token
354+
{{- end }}
348355
{{- if .Values.azure}}
349356
- name: AZURE_IDENTITY_NAME
350357
value: {{ .Values.azure.identity_name }}

charts/model-engine/templates/aws_config_map.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ data:
2020
[profile {{ $profileName }}]
2121
role_arn = {{ index $annotations "eks.amazonaws.com/role-arn" }}
2222
web_identity_token_file = /var/run/secrets/eks.amazonaws.com/serviceaccount/token
23+
[profile {{ $.Values.serviceAccount.sqsProfileName }}]
24+
role_arn = {{ index $annotations "eks.amazonaws.com/role-arn" }}
25+
web_identity_token_file = /var/run/secrets/eks.amazonaws.com/serviceaccount/token
2326
---
2427
{{- end }}
2528
{{- end }}

charts/model-engine/templates/inference_framework_config.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,18 @@ apiVersion: v1
22
kind: ConfigMap
33
metadata:
44
name: {{ include "modelEngine.fullname" . }}-inference-framework-latest-config
5+
namespace: {{ .Release.Namespace }}
56
labels:
67
product: common
78
team: infra
89
annotations:
9-
"helm.sh/hook": pre-install
10+
"helm.sh/hook": pre-install,pre-upgrade
1011
"helm.sh/hook-weight": "-2"
1112
data:
1213
deepspeed: "latest"
1314
text_generation_inference: "latest"
14-
vllm: "latest"
15-
vllm_batch: "latest"
16-
vllm_batch_v2: "latest"
15+
vllm: "{{ .Values.vllmTag }}"
16+
vllm_batch: "{{ .Values.vllmTag }}"
17+
vllm_batch_v2: "{{ .Values.vllmTag }}"
1718
lightllm: "latest"
1819
tensorrt_llm: "latest"

charts/model-engine/templates/istio-virtualservice.yaml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,9 @@ metadata:
1212
{{- end }}
1313
spec:
1414
hosts:
15-
{{- range .Values.virtualservice.hostDomains }}
16-
- "{{ $fullName }}.{{ . }}"
17-
{{- end }}
15+
- model-engine.{{ $.Values.global.networking.internalDomain }}
1816
gateways:
19-
{{- range .Values.virtualservice.gateways }}
20-
- {{ . | quote }}
21-
{{- end }}
17+
- {{ $.Values.global.networking.internalGateway }}
2218
http:
2319
- route:
2420
- destination:

charts/model-engine/templates/service_account_inference.yaml

Lines changed: 0 additions & 25 deletions
This file was deleted.

charts/model-engine/templates/service_config_map.yaml

Lines changed: 102 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ apiVersion: v1
33
kind: ConfigMap
44
metadata:
55
name: {{ include "modelEngine.fullname" . }}-service-config
6+
namespace: {{ .Release.Namespace }}
67
labels:
78
{{- include "modelEngine.labels" . | nindent 4 }}
89
annotations:
@@ -11,46 +12,110 @@ metadata:
1112
data:
1213
launch_service_config: |-
1314
dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
15+
16+
# Config to know where model-engine is running
1417
gateway_namespace: {{ .Release.Namespace | quote }}
15-
{{- with .Values.config.values.launch }}
16-
{{- range $key, $value := . }}
17-
{{ $key }}: {{ $value | quote }}
18-
{{- end }}
19-
{{- end }}
20-
infra_service_config: |-
21-
env: {{ .Values.context | quote }}
22-
{{- with .Values.config.values.infra }}
23-
{{- range $key, $value := . }}
24-
{{ $key }}: {{ $value | quote }}
25-
{{- end }}
26-
{{- end }}
2718
28-
---
19+
# Config for scale-hosted Hosted Model Inference in the prod cluster, plus a bunch of other config-ish notes
20+
# NOTE: If you add/change values inside this file that need to apply to all clusters, please make changes in
21+
# all service_config_{env}.yaml files as well.
2922
30-
apiVersion: v1
31-
kind: ConfigMap
32-
metadata:
33-
name: {{ include "modelEngine.fullname" . }}-service-config
34-
namespace: {{ .Values.config.values.launch.endpoint_namespace }}
35-
labels:
36-
{{- include "modelEngine.labels" . | nindent 4 }}
37-
annotations:
38-
"helm.sh/hook": pre-install,pre-upgrade
39-
"helm.sh/hook-weight": "-2"
40-
data:
41-
launch_service_config: |-
42-
dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
43-
gateway_namespace: {{ .Release.Namespace | quote }}
44-
{{- with .Values.config.values.launch }}
45-
{{- range $key, $value := . }}
46-
{{ $key }}: {{ $value | quote }}
47-
{{- end }}
48-
{{- end }}
23+
# Config for scale-hosted Hosted Model Inference in the prod cluster, see `service_config` for more details
24+
model_primitive_host: model-server.{{ .Release.Namespace }}.svc.cluster.local
25+
26+
# # Endpoint config
27+
# K8s namespace the endpoints will be created in
28+
endpoint_namespace: {{ .Release.Namespace | quote }}
29+
30+
# Asynchronous endpoints
31+
sqs_profile: {{ $.Values.serviceAccount.sqsProfileName }}
32+
sqs_queue_policy_template: |-
33+
{
34+
"Version": "2012-10-17",
35+
"Id": "__default_policy_ID",
36+
"Statement": [
37+
{
38+
"Sid": "__owner_statement",
39+
"Effect": "Allow",
40+
"Principal": {
41+
"AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:root"
42+
},
43+
"Action": "sqs:*",
44+
"Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
45+
},
46+
{
47+
"Effect": "Allow",
48+
"Principal": {
49+
"AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/{{ $.Values.serviceAccount.sqsProfileName }}"
50+
},
51+
"Action": "sqs:*",
52+
"Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
53+
},
54+
{
55+
"Effect": "Allow",
56+
"Principal": {
57+
"AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/ml_hosted_model_inference"
58+
},
59+
"Action": "sqs:*",
60+
"Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
61+
}
62+
]
63+
}
64+
65+
sqs_queue_tag_template: |-
66+
{
67+
"infra.scale.com/product": "{{ .Values.productTag }}",
68+
"infra.scale.com/team": "${team}",
69+
"infra.scale.com/contact": "{{ .Values.contactEmail }}",
70+
"infra.scale.com/customer": "AllCustomers",
71+
"infra.scale.com/financialOwner": "{{ .Values.contactEmail}}",
72+
"Launch-Endpoint-Id": "${endpoint_id}",
73+
"Launch-Endpoint-Name": "${endpoint_name}",
74+
"Launch-Endpoint-Created-By": "${endpoint_created_by}"
75+
}
76+
77+
# Billing
78+
billing_queue_arn: arn:aws:events:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:event-bus/money
79+
80+
# The below redis URL would not work if we needed auth, which we do, so we have to pull cache_url from the cache_redis_aws_secret_name
81+
cache_redis_aws_secret_name: "{{ .Values.secrets.redisAwsSecretName }}"
82+
83+
cloud_file_llm_fine_tune_repository: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/llm-ft-job-repository/prod"
84+
85+
dd_trace_enabled: true
86+
istio_enabled: true
87+
sensitive_log_mode: true
88+
tgi_repository: "text-generation-inference"
89+
vllm_repository: "vllm"
90+
lightllm_repository: "lightllm"
91+
tensorrt_llm_repository: "tensorrt-llm"
92+
batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
93+
user_inference_base_repository: "launch/inference"
94+
user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
95+
user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
96+
docker_image_layer_cache_repository: "kaniko-cache"
97+
98+
# S3 access
99+
hf_user_fine_tuned_weights_prefix: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/fine_tuned_weights"
49100
infra_service_config: |-
50101
env: {{ .Values.context | quote }}
51-
{{- with .Values.config.values.infra }}
52-
{{- range $key, $value := . }}
53-
{{ $key }}: {{ $value | quote }}
54-
{{- end }}
55-
{{- end }}
102+
cloud_provider: "aws"
103+
env: "prod"
104+
k8s_cluster_name: "usgw1-prod"
105+
dns_host_domain: "model-engine.ml-serving.{{ $.Values.global.networking.internalDomain }}"
106+
default_region: "{{ .Values.aws.region }}"
107+
ml_account_id: "{{ .Values.aws.accountId }}"
108+
docker_repo_prefix: "{{ .Values.aws.accountId }}.dkr.ecr.{{ .Values.aws.region }}.amazonaws.com"
109+
redis_host: "{{ .Values.redis.hostname }}"
110+
s3_bucket: "{{ .Values.aws.s3Bucket }}"
111+
profile_ml_worker: "ml-worker"
112+
profile_ml_inference_worker: "ml-worker"
113+
identity_service_url: "{{ .Values.identityServiceUrl }}"
114+
firehose_role_arn: "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/firehose-stream-logging-role"
115+
firehose_stream_name: "{{ .Values.firehoseStreamName }}"
116+
db_engine_pool_size: 20
117+
db_engine_max_overflow: 10
118+
db_engine_echo: false
119+
db_engine_echo_pool: true
120+
db_engine_disconnect_strategy: "pessimistic"
56121
{{- end }}

charts/model-engine/templates/service_template_config_map.yaml

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,17 @@ data:
9595
{{- toYaml . | nindent 12 }}
9696
{{- end }}
9797
{{- if eq $device "gpu" }}
98-
{{- if empty $node_selector }}
99-
nodeSelector:
100-
{{- end }}
101-
k8s.amazonaws.com/accelerator: ${GPU_TYPE}
98+
# {{- if empty $node_selector }}
99+
# nodeSelector:
100+
# {{- end }}
101+
# k8s.amazonaws.com/accelerator: ${GPU_TYPE}
102102
tolerations:
103103
- key: "nvidia.com/gpu"
104104
operator: "Exists"
105105
effect: "NoSchedule"
106+
- key: "gpu_a100_multi"
107+
operator: "Exists"
108+
effect: "NoSchedule"
106109
{{- end }}
107110
priorityClassName: ${PRIORITY}
108111
containers:
@@ -522,6 +525,7 @@ data:
522525
loadBalancer:
523526
simple: LEAST_REQUEST
524527
{{- end }}
528+
{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.autoscaling.vertical.enabled) }}
525529
vertical-pod-autoscaler.yaml: |-
526530
apiVersion: "autoscaling.k8s.io/v1"
527531
kind: VerticalPodAutoscaler
@@ -548,6 +552,7 @@ data:
548552
cpu: ${CPUS}
549553
memory: ${MEMORY}
550554
controlledResources: ["cpu", "memory"]
555+
{{- end }}
551556
pod-disruption-budget.yaml: |-
552557
apiVersion: policy/v1
553558
kind: PodDisruptionBudget
@@ -675,14 +680,17 @@ data:
675680
{{- toYaml . | nindent 12 }}
676681
{{- end }}
677682
{{- if eq $device "gpu" }}
678-
{{- if empty $node_selector }}
679-
nodeSelector:
680-
{{- end }}
681-
k8s.amazonaws.com/accelerator: ${GPU_TYPE}
683+
# {{- if empty $node_selector }}
684+
# nodeSelector:
685+
# {{- end }}
686+
# k8s.amazonaws.com/accelerator: ${GPU_TYPE}
682687
tolerations:
683688
- key: "nvidia.com/gpu"
684689
operator: "Exists"
685690
effect: "NoSchedule"
691+
- key: "gpu_a100_multi"
692+
operator: "Exists"
693+
effect: "NoSchedule"
686694
{{- end }}
687695
{{- if $service_template_service_account_name }}
688696
serviceAccountName: {{ $service_template_service_account_name }}

charts/model-engine/values.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ redis:
55
auth:
66
db:
77
runDbInitScript: false
8-
balloonNodeSelector:
9-
node-lifecycle: normal
10-
nodeSelector:
11-
node-lifecycle: normal
8+
# balloonNodeSelector:
9+
# node-lifecycle: normal
10+
# nodeSelector:
11+
# node-lifecycle: normal

0 commit comments

Comments
 (0)