1+ import time
12import boto3
3+ from botocore .exceptions import ClientError
24
3- from arns import BEDROCK_KB_IAM_ROLE_ARN , REDSHIFT_WORKGROUP_ARN
5+ from arns import BEDROCK_KB_IAM_ROLE_ARN
6+ from get_redshift_wg_arn import REDSHIFT_WORKGROUP_ARN
47from logger import logger
58from vars import AWS_REGION , BEDROCK_KB , GLUE_DB , S3_FOLDER
69
7- GLUE_TABLE = S3_FOLDER
10+ GLUE_TABLE_FULL = f" { GLUE_DB } . { S3_FOLDER } "
811
912bedrock = boto3 .client ("bedrock-agent" , region_name = AWS_REGION )
1013
11- bedrock .create_knowledge_base (
12- name = BEDROCK_KB ,
13- roleArn = BEDROCK_KB_IAM_ROLE_ARN ,
14- knowledgeBaseConfiguration = {
15- "type" : "STRUCTURED" ,
16- "sqlKnowledgeBaseConfiguration" : {
17- "type" : "REDSHIFT" ,
18- "redshiftConfiguration" : {
19- "queryEngineConfiguration" : {
20- "type" : "SERVERLESS" ,
21- "serverlessConfiguration" : {
22- "workgroupArn" : REDSHIFT_WORKGROUP_ARN ,
23- "authConfiguration" : {
24- "type" : "IAM"
25- }
14+ def setup_structured_kb ():
15+ try :
16+ logger .info (f"Creating Knowledge Base: { BEDROCK_KB } ..." )
17+ kb_response = bedrock .create_knowledge_base (
18+ name = BEDROCK_KB ,
19+ roleArn = BEDROCK_KB_IAM_ROLE_ARN ,
20+ knowledgeBaseConfiguration = {
21+ "type" : "SQL" ,
22+ "sqlKnowledgeBaseConfiguration" : {
23+ "type" : "REDSHIFT" ,
24+ "redshiftConfiguration" : {
25+ "queryEngineConfiguration" : {
26+ "type" : "SERVERLESS" ,
27+ "serverlessConfiguration" : {
28+ "workgroupArn" : REDSHIFT_WORKGROUP_ARN ,
29+ "authConfiguration" : {"type" : "IAM" }
30+ }
31+ },
32+ "storageConfigurations" : [
33+ {
34+ "type" : "AWS_DATA_CATALOG" ,
35+ "awsDataCatalogConfiguration" : {
36+ "tableNames" : [GLUE_TABLE_FULL ]
37+ }
38+ }
39+ ]
2640 }
27- },
28- "storageConfigurations" : [
29- {
30- "type" : "AWS_DATA_CATALOG" ,
31- "awsDataCatalogConfiguration" : {
32- "tableNames" : [f"{ GLUE_DB } .{ GLUE_TABLE } " ]
33- }
34- }
35- ]
41+ }
42+ }
43+ )
44+ kb_id = kb_response ['knowledgeBase' ]['knowledgeBaseId' ]
45+ logger .info (f"Successfully created KB with ID: { kb_id } " )
46+
47+ except ClientError as e :
48+ if e .response ['Error' ]['Code' ] == 'ConflictException' :
49+ logger .info (f"KB { BEDROCK_KB } already exists. Fetching ID..." )
50+ # Logic to find existing ID
51+ kbs = bedrock .list_knowledge_bases (maxResults = 100 )['knowledgeBaseSummaries' ]
52+ kb_id = next (kb ['knowledgeBaseId' ] for kb in kbs if kb ['name' ] == BEDROCK_KB )
53+ else :
54+ raise e
55+
56+ try :
57+ logger .info ("Connecting Redshift Metadata Data Source..." )
58+ ds_response = bedrock .create_data_source (
59+ knowledgeBaseId = kb_id ,
60+ name = f"{ BEDROCK_KB } -metadata-source" ,
61+ dataSourceConfiguration = {
62+ "type" : "REDSHIFT_METADATA"
3663 }
37- }
38- }
39- )
64+ )
65+ ds_id = ds_response ['dataSource' ]['dataSourceId' ]
66+ logger .info (f"Data Source Created: { ds_id } " )
67+
68+ except ClientError as e :
69+ if e .response ['Error' ]['Code' ] == 'ConflictException' :
70+ logger .info ("Data Source already exists. Fetching ID..." )
71+ sources = bedrock .list_data_sources (knowledgeBaseId = kb_id )['dataSourceSummaries' ]
72+ ds_id = sources [0 ]['dataSourceId' ]
73+ else :
74+ raise e
75+
76+ # TRIGGER SYNC (INGESTION)
77+ logger .info ("Starting Metadata Ingestion Job (Sync)..." )
78+ ingest_response = bedrock .start_ingestion_job (
79+ knowledgeBaseId = kb_id ,
80+ dataSourceId = ds_id
81+ )
82+ job_id = ingest_response ['ingestionJob' ]['ingestionJobId' ]
83+
84+ # WAIT FOR SYNC
85+ while True :
86+ job = bedrock .get_ingestion_job (
87+ knowledgeBaseId = kb_id ,
88+ dataSourceId = ds_id ,
89+ ingestionJobId = job_id
90+ )
91+ status = job ['ingestionJob' ]['status' ]
92+ logger .info (f"Sync Status: { status } " )
93+
94+ if status in ['COMPLETE' , 'FAILED' , 'STOPPED' ]:
95+ if status == 'FAILED' :
96+ logger .error (f"Sync failed. Reasons: { job ['ingestionJob' ].get ('failureReasons' )} " )
97+ break
98+ time .sleep (10 )
99+
100+ logger .info ("Knowledge Base is now fully ready for SQL queries." )
40101
41- logger .info (f"KB Created: { BEDROCK_KB } with Redshift Serverless as data source." )
102+ if __name__ == "__main__" :
103+ setup_structured_kb ()
0 commit comments