@@ -3,6 +3,7 @@ apiVersion: v1
3
3
kind : ConfigMap
4
4
metadata :
5
5
name : {{ include "modelEngine.fullname" . }}-service-config
6
+ namespace : {{ .Release.Namespace }}
6
7
labels :
7
8
{{- include "modelEngine.labels" . | nindent 4 }}
8
9
annotations :
@@ -11,46 +12,110 @@ metadata:
11
12
data :
12
13
launch_service_config : |-
13
14
dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
15
+
16
+ # Config to know where model-engine is running
14
17
gateway_namespace: {{ .Release.Namespace | quote }}
15
- {{- with .Values.config.values.launch }}
16
- {{- range $key, $value := . }}
17
- {{ $key }}: {{ $value | quote }}
18
- {{- end }}
19
- {{- end }}
20
- infra_service_config : |-
21
- env: {{ .Values.context | quote }}
22
- {{- with .Values.config.values.infra }}
23
- {{- range $key, $value := . }}
24
- {{ $key }}: {{ $value | quote }}
25
- {{- end }}
26
- {{- end }}
27
18
28
- ---
19
+ # Config for scale-hosted Hosted Model Inference in the prod cluster, plus a bunch of other config-ish notes
20
+ # NOTE: If you add/change values inside this file that need to apply to all clusters, please make changes in
21
+ # all service_config_{env}.yaml files as well.
29
22
30
- apiVersion : v1
31
- kind : ConfigMap
32
- metadata :
33
- name : {{ include "modelEngine.fullname" . }}-service-config
34
- namespace : {{ .Values.config.values.launch.endpoint_namespace }}
35
- labels :
36
- {{- include "modelEngine.labels" . | nindent 4 }}
37
- annotations :
38
- " helm.sh/hook " : pre-install,pre-upgrade
39
- " helm.sh/hook-weight " : " -2"
40
- data :
41
- launch_service_config : |-
42
- dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
43
- gateway_namespace: {{ .Release.Namespace | quote }}
44
- {{- with .Values.config.values.launch }}
45
- {{- range $key, $value := . }}
46
- {{ $key }}: {{ $value | quote }}
47
- {{- end }}
48
- {{- end }}
23
+ # Config for scale-hosted Hosted Model Inference in the prod cluster, see `service_config` for more details
24
+ model_primitive_host: model-server.{{ .Release.Namespace }}.svc.cluster.local
25
+
26
+ # # Endpoint config
27
+ # K8s namespace the endpoints will be created in
28
+ endpoint_namespace: {{ .Release.Namespace | quote }}
29
+
30
+ # Asynchronous endpoints
31
+ sqs_profile: {{ $.Values.serviceAccount.sqsProfileName }}
32
+ sqs_queue_policy_template: |-
33
+ {
34
+ "Version": "2012-10-17",
35
+ "Id": "__default_policy_ID",
36
+ "Statement": [
37
+ {
38
+ "Sid": "__owner_statement",
39
+ "Effect": "Allow",
40
+ "Principal": {
41
+ "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:root"
42
+ },
43
+ "Action": "sqs:*",
44
+ "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
45
+ },
46
+ {
47
+ "Effect": "Allow",
48
+ "Principal": {
49
+ "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/{{ $.Values.serviceAccount.sqsProfileName }}"
50
+ },
51
+ "Action": "sqs:*",
52
+ "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
53
+ },
54
+ {
55
+ "Effect": "Allow",
56
+ "Principal": {
57
+ "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/ml_hosted_model_inference"
58
+ },
59
+ "Action": "sqs:*",
60
+ "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
61
+ }
62
+ ]
63
+ }
64
+
65
+ sqs_queue_tag_template: |-
66
+ {
67
+ "infra.scale.com/product": "{{ .Values.productTag }}",
68
+ "infra.scale.com/team": "${team}",
69
+ "infra.scale.com/contact": "{{ .Values.contactEmail }}",
70
+ "infra.scale.com/customer": "AllCustomers",
71
+ "infra.scale.com/financialOwner": "{{ .Values.contactEmail}}",
72
+ "Launch-Endpoint-Id": "${endpoint_id}",
73
+ "Launch-Endpoint-Name": "${endpoint_name}",
74
+ "Launch-Endpoint-Created-By": "${endpoint_created_by}"
75
+ }
76
+
77
+ # Billing
78
+ billing_queue_arn: arn:aws:events:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:event-bus/money
79
+
80
+ # The below redis URL would not work if we needed auth, which we do, so we have to pull cache_url from the cache_redis_aws_secret_name
81
+ cache_redis_aws_secret_name: "{{ .Values.secrets.redisAwsSecretName }}"
82
+
83
+ cloud_file_llm_fine_tune_repository: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/llm-ft-job-repository/prod"
84
+
85
+ dd_trace_enabled: true
86
+ istio_enabled: true
87
+ sensitive_log_mode: true
88
+ tgi_repository: "text-generation-inference"
89
+ vllm_repository: "vllm"
90
+ lightllm_repository: "lightllm"
91
+ tensorrt_llm_repository: "tensorrt-llm"
92
+ batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
93
+ user_inference_base_repository: "launch/inference"
94
+ user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
95
+ user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
96
+ docker_image_layer_cache_repository: "kaniko-cache"
97
+
98
+ # S3 access
99
+ hf_user_fine_tuned_weights_prefix: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/fine_tuned_weights"
49
100
infra_service_config : |-
50
101
env: {{ .Values.context | quote }}
51
- {{- with .Values.config.values.infra }}
52
- {{- range $key, $value := . }}
53
- {{ $key }}: {{ $value | quote }}
54
- {{- end }}
55
- {{- end }}
102
+ cloud_provider: "aws"
103
+ env: "prod"
104
+ k8s_cluster_name: "usgw1-prod"
105
+ dns_host_domain: "model-engine.ml-serving.{{ $.Values.global.networking.internalDomain }}"
106
+ default_region: "{{ .Values.aws.region }}"
107
+ ml_account_id: "{{ .Values.aws.accountId }}"
108
+ docker_repo_prefix: "{{ .Values.aws.accountId }}.dkr.ecr.{{ .Values.aws.region }}.amazonaws.com"
109
+ redis_host: "{{ .Values.redis.hostname }}"
110
+ s3_bucket: "{{ .Values.aws.s3Bucket }}"
111
+ profile_ml_worker: "ml-worker"
112
+ profile_ml_inference_worker: "ml-worker"
113
+ identity_service_url: "{{ .Values.identityServiceUrl }}"
114
+ firehose_role_arn: "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/firehose-stream-logging-role"
115
+ firehose_stream_name: "{{ .Values.firehoseStreamName }}"
116
+ db_engine_pool_size: 20
117
+ db_engine_max_overflow: 10
118
+ db_engine_echo: false
119
+ db_engine_echo_pool: true
120
+ db_engine_disconnect_strategy: "pessimistic"
56
121
{{- end }}
0 commit comments