From ecbb242d6c715a6d7eb9426b97d99cc3cb93bd40 Mon Sep 17 00:00:00 2001 From: Janelle Tam <64625892+janelletam@users.noreply.github.com> Date: Tue, 26 May 2026 15:39:00 -0400 Subject: [PATCH 1/5] feat(integrations/googledrivekb): add new google drive knowledge base integration (#15213) --- integrations/googledrivekb/eslint.config.mjs | 13 + integrations/googledrivekb/hub.md | 31 ++ integrations/googledrivekb/icon.svg | 8 + .../googledrivekb/integration.definition.ts | 177 ++++++ integrations/googledrivekb/linkTemplate.vrl | 4 + integrations/googledrivekb/package.json | 31 ++ integrations/googledrivekb/src/actions.ts | 100 ++++ integrations/googledrivekb/src/auth.ts | 106 ++++ integrations/googledrivekb/src/client.ts | 514 ++++++++++++++++++ .../googledrivekb/src/error-handling.ts | 63 +++ .../googledrivekb/src/file-channels-cache.ts | 91 ++++ .../googledrivekb/src/file-event-handler.ts | 51 ++ .../src/file-notification-token.ts | 29 + .../googledrivekb/src/files-api-utils.ts | 56 ++ integrations/googledrivekb/src/files-cache.ts | 101 ++++ .../src/files-readonly/actions/index.ts | 11 + .../actions/list-items-in-folder.ts | 199 +++++++ .../actions/transfer-file-to-botpress.ts | 18 + .../google-drive-file-tree.test.ts | 390 +++++++++++++ .../files-readonly/google-drive-file-tree.ts | 303 +++++++++++ integrations/googledrivekb/src/handler.ts | 119 ++++ integrations/googledrivekb/src/index.ts | 19 + integrations/googledrivekb/src/mime-types.ts | 15 + .../googledrivekb/src/notification-handler.ts | 59 ++ integrations/googledrivekb/src/schemas.ts | 169 ++++++ integrations/googledrivekb/src/setup.ts | 2 + integrations/googledrivekb/src/types.ts | 54 ++ integrations/googledrivekb/src/utils.ts | 57 ++ integrations/googledrivekb/src/validation.ts | 146 +++++ integrations/googledrivekb/tsconfig.json | 11 + integrations/googledrivekb/vitest.config.ts | 2 + pnpm-lock.yaml | 47 +- 32 files changed, 2994 insertions(+), 2 deletions(-) create mode 100644 integrations/googledrivekb/eslint.config.mjs create mode 100644 integrations/googledrivekb/hub.md create mode 100644 integrations/googledrivekb/icon.svg create mode 100644 integrations/googledrivekb/integration.definition.ts create mode 100644 integrations/googledrivekb/linkTemplate.vrl create mode 100644 integrations/googledrivekb/package.json create mode 100644 integrations/googledrivekb/src/actions.ts create mode 100644 integrations/googledrivekb/src/auth.ts create mode 100644 integrations/googledrivekb/src/client.ts create mode 100644 integrations/googledrivekb/src/error-handling.ts create mode 100644 integrations/googledrivekb/src/file-channels-cache.ts create mode 100644 integrations/googledrivekb/src/file-event-handler.ts create mode 100644 integrations/googledrivekb/src/file-notification-token.ts create mode 100644 integrations/googledrivekb/src/files-api-utils.ts create mode 100644 integrations/googledrivekb/src/files-cache.ts create mode 100644 integrations/googledrivekb/src/files-readonly/actions/index.ts create mode 100644 integrations/googledrivekb/src/files-readonly/actions/list-items-in-folder.ts create mode 100644 integrations/googledrivekb/src/files-readonly/actions/transfer-file-to-botpress.ts create mode 100644 integrations/googledrivekb/src/files-readonly/google-drive-file-tree.test.ts create mode 100644 integrations/googledrivekb/src/files-readonly/google-drive-file-tree.ts create mode 100644 integrations/googledrivekb/src/handler.ts create mode 100644 integrations/googledrivekb/src/index.ts create mode 100644 integrations/googledrivekb/src/mime-types.ts create mode 100644 integrations/googledrivekb/src/notification-handler.ts create mode 100644 integrations/googledrivekb/src/schemas.ts create mode 100644 integrations/googledrivekb/src/setup.ts create mode 100644 integrations/googledrivekb/src/types.ts create mode 100644 integrations/googledrivekb/src/utils.ts create mode 100644 integrations/googledrivekb/src/validation.ts create mode 100644 integrations/googledrivekb/tsconfig.json create mode 100644 integrations/googledrivekb/vitest.config.ts diff --git a/integrations/googledrivekb/eslint.config.mjs b/integrations/googledrivekb/eslint.config.mjs new file mode 100644 index 00000000000..8c81907e7de --- /dev/null +++ b/integrations/googledrivekb/eslint.config.mjs @@ -0,0 +1,13 @@ +import rootConfig from '../../eslint.config.mjs' + +export default [ + ...rootConfig, + { + languageOptions: { + parserOptions: { + project: ['./tsconfig.json'], + tsconfigRootDir: import.meta.dirname, + }, + }, + }, +] diff --git a/integrations/googledrivekb/hub.md b/integrations/googledrivekb/hub.md new file mode 100644 index 00000000000..cf38b1e0b09 --- /dev/null +++ b/integrations/googledrivekb/hub.md @@ -0,0 +1,31 @@ +# Description + +Enable your bot to sync Google Drive files into a Botpress knowledge base. This integration reads and lists all files in your Google Drive and transfers them to the Botpress files API for indexing. + +# Configuration + +This integration requires OAuth authorization to connect your Google Drive account to Botpress. + +## Automatic configuration with OAuth + +Click the authorization button and follow the on-screen instructions. A Botpress-managed Google Drive application with read-only access will be used to connect to your account. + +Actions taken by the bot will be attributed to the user who authorized the connection. **We recommend using a service account** rather than a personal Google Drive account. Share the relevant folders with the service account to control what the knowledge base can access. + +## Configuring the integration in Botpress + +1. Authorize the Google Drive Knowledge Base integration by clicking the authorization button. +2. Follow the on-screen instructions to connect your Botpress chatbot to Google Drive. +3. Once the connection is established, save the configuration and enable the integration. + +# Using the integration + +Use this integration as a knowledge base source. It connects with the **Knowledge Connector** plugin to automatically sync files from Google Drive folders into a Botpress knowledge base. + +Use the `syncChannels` action to maintain subscription channels on all available files and folders. These channels notify your bot when files are created, updated, or deleted. Channels are valid for up to one day — call this action once daily to prevent event loss. + +# Limitations + +Standard Google Drive API limitations apply. These include rate limits, file size restrictions, and other constraints imposed by the Google Drive platform. + +More details are available in the [Google Drive API documentation](https://developers.google.com/drive/api/guides/about-sdk). diff --git a/integrations/googledrivekb/icon.svg b/integrations/googledrivekb/icon.svg new file mode 100644 index 00000000000..a8cefd5b28b --- /dev/null +++ b/integrations/googledrivekb/icon.svg @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/integrations/googledrivekb/integration.definition.ts b/integrations/googledrivekb/integration.definition.ts new file mode 100644 index 00000000000..6e3f6db853d --- /dev/null +++ b/integrations/googledrivekb/integration.definition.ts @@ -0,0 +1,177 @@ +import * as sdk from '@botpress/sdk' +import { sentry as sentryHelpers } from '@botpress/sdk-addons' +import filesReadonly from './bp_modules/files-readonly' +import { + fileSchema, + downloadFileDataArgSchema, + listFoldersOutputSchema, + listFilesOutputSchema, + readFileArgSchema, + listItemsInputSchema, + downloadFileDataOutputSchema, + fileDeletedEventSchema, + folderSchema, + folderDeletedEventSchema, + baseDiscriminatedFileSchema, + fileChannelSchema, +} from './src/schemas' + +// TODO: use default options +const toJSONSchemaOptions: Partial = { + discriminatedUnionStrategy: 'anyOf', + discriminator: false, +} + +export default new sdk.IntegrationDefinition({ + name: 'googledrivekb', + title: 'Google Drive (Knowledge Base)', + description: 'Sync Google Drive files into a Botpress knowledge base using read-only access to all files.', + version: '0.1.0', + readme: 'hub.md', + icon: 'icon.svg', + attributes: { + category: 'File Management', + repo: 'botpress', + }, + configuration: { + identifier: { + linkTemplateScript: 'linkTemplate.vrl', + }, + schema: sdk.z.object({}), + }, + actions: { + listFiles: { + title: 'List Files', + description: 'List files in Google Drive', + input: { + schema: listItemsInputSchema, + }, + output: { + schema: listFilesOutputSchema, + }, + }, + listFolders: { + title: 'List folders', + description: 'List folders in Google Drive', + input: { + schema: listItemsInputSchema, + }, + output: { + schema: listFoldersOutputSchema, + }, + }, + readFile: { + title: 'Read File', + description: "Read a file's metadata in a Google Drive", + input: { + schema: readFileArgSchema, + }, + output: { + schema: fileSchema.describe('The file read from Google Drive'), + }, + }, + downloadFileData: { + title: 'Download file data', + description: 'Download data from a file in Google Drive', + input: { + schema: downloadFileDataArgSchema, + }, + output: { + schema: downloadFileDataOutputSchema, + }, + }, + syncChannels: { + title: 'Sync Channels', + description: 'Sync channels for file change subscriptions', + input: { + schema: sdk.z.object({}), + }, + output: { + schema: sdk.z.object({}), + }, + }, + }, + events: { + fileCreated: { + title: 'File Created', + description: 'Triggered when a file is created in Google Drive', + schema: fileSchema, + }, + fileDeleted: { + title: 'File Deleted', + description: 'Triggered when a file is deleted in Google Drive', + schema: fileDeletedEventSchema, + }, + folderCreated: { + title: 'Folder Created', + description: 'Triggered when a folder is created in Google Drive', + schema: folderSchema, + }, + folderDeleted: { + title: 'Folder Deleted', + description: 'Triggered when a folder is deleted in Google Drive', + schema: folderDeletedEventSchema, + }, + }, + states: { + configuration: { + type: 'integration', + schema: sdk.z.object({ + refreshToken: sdk.z + .string() + .title('Refresh token') + .describe('The refresh token to use to authenticate with Google. It gets exchanged for a bearer token'), + }), + }, + filesCache: { + type: 'integration', + schema: sdk.z.object({ + filesCache: sdk.z + .record(sdk.z.string(), baseDiscriminatedFileSchema) + .title('Files cache') + .describe('Map of known files'), + }), + }, + filesChannelsCache: { + type: 'integration', + schema: sdk.z.object({ + filesChannelsCache: sdk.z + .record(sdk.z.string(), fileChannelSchema) + .title('Files change subscription channels') + .describe('Serialized set of channels for file change subscriptions'), + }), + }, + }, + secrets: { + ...sentryHelpers.COMMON_SECRET_NAMES, + CLIENT_ID: { + description: 'The client ID in your Google Cloud Credentials', + }, + CLIENT_SECRET: { + description: 'The client secret associated with your client ID', + }, + WEBHOOK_SECRET: { + description: 'The secret used to sign webhook tokens. Should be a high-entropy string that only Botpress knows', + }, + }, + __advanced: { toJSONSchemaOptions }, +}).extend(filesReadonly, ({}) => ({ + entities: {}, + actions: { + listItemsInFolder: { + name: 'filesReadonlyListItemsInFolder', + attributes: { ...sdk.WELL_KNOWN_ATTRIBUTES.HIDDEN_IN_STUDIO }, + }, + transferFileToBotpress: { + name: 'filesReadonlyTransferFileToBotpress', + attributes: { ...sdk.WELL_KNOWN_ATTRIBUTES.HIDDEN_IN_STUDIO }, + }, + }, + events: { + fileCreated: { name: 'filesReadonlyFileCreated' }, + fileUpdated: { name: 'filesReadonlyFileUpdated' }, + fileDeleted: { name: 'filesReadonlyFileDeleted' }, + folderDeletedRecursive: { name: 'filesReadonlyFolderDeletedRecursive' }, + aggregateFileChanges: { name: 'filesReadonlyAggregateFileChanges' }, + }, +})) diff --git a/integrations/googledrivekb/linkTemplate.vrl b/integrations/googledrivekb/linkTemplate.vrl new file mode 100644 index 00000000000..23372049f7a --- /dev/null +++ b/integrations/googledrivekb/linkTemplate.vrl @@ -0,0 +1,4 @@ +webhookId = to_string!(.webhookId) +webhookUrl = to_string!(.webhookUrl) + +"{{ webhookUrl }}/oauth/wizard/start?state={{ webhookId }}" diff --git a/integrations/googledrivekb/package.json b/integrations/googledrivekb/package.json new file mode 100644 index 00000000000..ebcbe5fe481 --- /dev/null +++ b/integrations/googledrivekb/package.json @@ -0,0 +1,31 @@ +{ + "name": "@botpresshub/googledrivekb", + "scripts": { + "check:type": "tsc --noEmit", + "check:bplint": "bp lint", + "build": "bp add -y && bp build", + "test": "vitest --run" + }, + "private": true, + "dependencies": { + "@botpress/client": "workspace:*", + "@botpress/common": "workspace:*", + "@botpress/sdk": "workspace:*", + "@botpress/sdk-addons": "workspace:*", + "axios": "^1.7.7", + "googleapis": "^144.0.0", + "jsonwebtoken": "^9.0.2", + "uuid": "^9.0.0" + }, + "devDependencies": { + "@botpress/cli": "workspace:*", + "@botpress/sdk": "workspace:*", + "@sentry/cli": "^2.39.1", + "@types/jsonwebtoken": "^9.0.3", + "@types/uuid": "^9.0.1", + "preact": "^10.26.6" + }, + "bpDependencies": { + "files-readonly": "../../interfaces/files-readonly" + } +} diff --git a/integrations/googledrivekb/src/actions.ts b/integrations/googledrivekb/src/actions.ts new file mode 100644 index 00000000000..39f3539162e --- /dev/null +++ b/integrations/googledrivekb/src/actions.ts @@ -0,0 +1,100 @@ +import { Client as DriveClient } from './client' +import { wrapWithTryCatch } from './error-handling' +import { FileChannelsCache } from './file-channels-cache' +import { FileEventHandler } from './file-event-handler' +import { downloadToBotpress } from './files-api-utils' +import { FilesCache } from './files-cache' +import { filesReadonlyActions } from './files-readonly/actions' +import * as bp from '.botpress' + +type ActionPropsAndTools = { + driveClient: DriveClient + filesCache: FilesCache + fileChannelsCache: FileChannelsCache + fileEventHandler: FileEventHandler +} & T + +const createActionPropsAndTools = async (props: T): Promise> => { + const { client, ctx, logger } = props + const driveClient = await DriveClient.create({ client, ctx, logger }) + const filesCache = await FilesCache.load({ client, ctx }) + const fileChannelsCache = await FileChannelsCache.load({ client, ctx }) + driveClient.setCache(filesCache) + return { + driveClient, + filesCache, + fileChannelsCache, + fileEventHandler: new FileEventHandler(client, driveClient, filesCache, fileChannelsCache), + ...props, + } +} + +const saveAllCaches = async (props: ActionPropsAndTools) => { + await props.filesCache.save() + await props.fileChannelsCache.save() +} + +const makeSaveAllCachesAndReturnResult = + (props: ActionPropsAndTools) => + async (actionOutput: R) => { + await saveAllCaches(props) + return actionOutput + } + +const listFiles: bp.IntegrationProps['actions']['listFiles'] = wrapWithTryCatch(async (baseProps) => { + const props = await createActionPropsAndTools(baseProps) + const { driveClient, input } = props + const saveAllCachesAndReturnResult = makeSaveAllCachesAndReturnResult(props) + return await driveClient.listFiles(input).then(saveAllCachesAndReturnResult) +}, 'Error listing files') + +const listFolders: bp.IntegrationProps['actions']['listFolders'] = wrapWithTryCatch(async (baseProps) => { + const props = await createActionPropsAndTools(baseProps) + const { driveClient, input } = props + const saveAllCachesAndReturnResult = makeSaveAllCachesAndReturnResult(props) + return await driveClient.listFolders(input).then(saveAllCachesAndReturnResult) +}, 'Error listing folders') + +const readFile: bp.IntegrationProps['actions']['readFile'] = wrapWithTryCatch(async (baseProps) => { + const props = await createActionPropsAndTools(baseProps) + const { driveClient, input } = props + const saveAllCachesAndReturnResult = makeSaveAllCachesAndReturnResult(props) + return await driveClient.readFile(input.id).then(saveAllCachesAndReturnResult) +}, 'Error reading file') + +const downloadFileData: bp.IntegrationProps['actions']['downloadFileData'] = wrapWithTryCatch(async (baseProps) => { + const props = await createActionPropsAndTools(baseProps) + const { driveClient, input } = props + const { id, index } = input + + const { botpressFileId, botpressFileUrl } = await downloadToBotpress({ + botpressFileKey: id, + googleDriveFileId: id, + client: props.client, + driveClient, + indexFile: index, + }) + + await saveAllCaches(props) + return { bpFileId: botpressFileId, url: botpressFileUrl } +}, 'Error downloading file') + +const syncChannels: bp.IntegrationProps['actions']['syncChannels'] = wrapWithTryCatch(async (baseProps) => { + const props = await createActionPropsAndTools(baseProps) + const { driveClient, fileChannelsCache } = props + const { fileChannels: newChannels } = await driveClient.tryWatchAll() + const oldChannels = fileChannelsCache.setAll(newChannels) + await fileChannelsCache.save() + await driveClient.tryUnwatch(oldChannels) + return {} +}, 'Error syncing channels') + +export default { + listFiles, + listFolders, + readFile, + downloadFileData, + syncChannels, + + ...filesReadonlyActions, +} as const satisfies bp.IntegrationProps['actions'] diff --git a/integrations/googledrivekb/src/auth.ts b/integrations/googledrivekb/src/auth.ts new file mode 100644 index 00000000000..4c7da9680f4 --- /dev/null +++ b/integrations/googledrivekb/src/auth.ts @@ -0,0 +1,106 @@ +import * as oauthWizard from '@botpress/common/src/oauth-wizard' +import * as sdk from '@botpress/sdk' +import { google } from 'googleapis' +import { GoogleOAuth2Client, GoogleDriveClient } from './types' +import * as bp from '.botpress' + +export const getAuthenticatedGoogleClient = async ({ + client, + ctx, +}: { + client: bp.Client + ctx: bp.Context +}): Promise => { + const oauth2Client = await _getAuthenticatedOAuthClient({ client, ctx }) + + return google.drive({ version: 'v3', auth: oauth2Client }) +} + +export const getAccessToken = async (props: { client: bp.Client; ctx: bp.Context }) => { + const oauth2Client = await _getAuthenticatedOAuthClient(props) + const { token } = await oauth2Client.getAccessToken() + + if (!token) { + throw new sdk.RuntimeError('Unable to obtain access token. Please try the OAuth flow again.') + } + return token +} + +/** + * @return The updated refresh token + */ +export const updateRefreshTokenFromAuthorizationCode = async ({ + authorizationCode, + client, + ctx, +}: { + authorizationCode: string + client: bp.Client + ctx: bp.Context +}): Promise => { + const refreshToken = await exchangeAuthorizationCodeForRefreshToken(authorizationCode) + await _saveRefreshTokenIntoStates({ client, ctx, refreshToken }) + return refreshToken +} + +const _getAuthenticatedOAuthClient = async ({ + client, + ctx, +}: { + client: bp.Client + ctx: bp.Context +}): Promise => { + const token = await _getRefreshTokenFromStates({ client, ctx }) + + const oauth2Client = _getOAuthClient() + oauth2Client.setCredentials({ refresh_token: token }) + + return oauth2Client +} + +const _getOAuthClient = (): GoogleOAuth2Client => + new google.auth.OAuth2( + bp.secrets.CLIENT_ID, + bp.secrets.CLIENT_SECRET, + oauthWizard.getWizardStepUrl('oauth-callback').href + ) + +const _getRefreshTokenFromStates = async ({ client, ctx }: { client: bp.Client; ctx: bp.Context }) => { + const { state } = await client.getState({ + type: 'integration', + name: 'configuration', + id: ctx.integrationId, + }) + + return state.payload.refreshToken +} + +const _saveRefreshTokenIntoStates = async ({ + client, + ctx, + refreshToken, +}: { + client: bp.Client + ctx: bp.Context + refreshToken: string +}) => { + await client.setState({ + type: 'integration', + name: 'configuration', + id: ctx.integrationId, + payload: { refreshToken }, + }) +} + +const exchangeAuthorizationCodeForRefreshToken = async (authorizationCode: string) => { + const oauth2Client = _getOAuthClient() + const { tokens } = await oauth2Client.getToken({ + code: authorizationCode, + }) + + if (!tokens.refresh_token) { + throw new sdk.RuntimeError('Unable to obtain refresh token. Please try the OAuth flow again.') + } + + return tokens.refresh_token +} diff --git a/integrations/googledrivekb/src/client.ts b/integrations/googledrivekb/src/client.ts new file mode 100644 index 00000000000..47c1d8d87e8 --- /dev/null +++ b/integrations/googledrivekb/src/client.ts @@ -0,0 +1,514 @@ +import { RuntimeError } from '@botpress/sdk' +import { Readable } from 'stream' +import { v4 as uuidv4 } from 'uuid' +import { getAuthenticatedGoogleClient } from './auth' +import { + handleNotFoundError, + handleRateLimitError, + isGaxiosError, + isSubscriptionRateLimitError, +} from './error-handling' +import { serializeToken } from './file-notification-token' +import { FilesCache } from './files-cache' +import { APP_GOOGLE_FOLDER_MIMETYPE, APP_GOOGLE_SHORTCUT_MIMETYPE, INDEXABLE_MIMETYPES } from './mime-types' +import { + BaseDiscriminatedFile, + GoogleDriveClient, + BaseNormalFile, + File, + BaseFolderFile, + Folder, + ListFilesOutput, + ListFoldersOutput, + ListItemsInput, + ListItemsOutput, + BaseGenericFileUnion, + FileChannel, + GenericFile, +} from './types' +import { listItemsAndProcess, ListFunction, streamToBuffer, ListItemsInputWithArgs, listAllItems } from './utils' +import { getFileTypeFromMimeType, parseChannel, parseBaseGeneric, parseBaseGenerics } from './validation' +import * as bp from '.botpress' + +type DownloadFileDataClientOutput = { + mimeType: string + dataSize: number +} & ( + | { + dataType: 'buffer' + data: Buffer + } + | { + dataType: 'stream' + data: Readable + } +) +type TryWatchAllOutput = { + fileChannels: FileChannel[] + hasError: boolean +} + +const MAX_RESOURCE_WATCH_EXPIRATION_DELAY_MS = 86400 * 1000 // 24 hours +const MAX_EXPORT_FILE_SIZE_BYTES = 10000000 // 10MB, as per the Google Drive API doc +const MYDRIVE_ID_ALIAS = 'root' +const PAGE_SIZE = 100 +const GOOGLE_API_EXPORTFORMATS_FIELDS = 'exportFormats' +const GOOGLE_API_FILE_FIELDS = + 'id, name, mimeType, parents, size, sha256Checksum, md5Checksum, version, trashed, modifiedTime, driveId, sharedWithMeTime' +const GOOGLE_API_FILELIST_FIELDS = `files(${GOOGLE_API_FILE_FIELDS}), nextPageToken` + +const INCLUDE_FILES_FROM_ALL_DRIVES = { + includeItemsFromAllDrives: true, + supportsAllDrives: true, +} as const + +export class Client { + private constructor( + private _ctx: bp.Context, + private _googleClient: GoogleDriveClient, + private _filesCache: FilesCache, + private _logger: bp.Logger + ) {} + + public static async create({ + client, + ctx, + logger, + }: { + client: bp.Client + ctx: bp.Context + logger: bp.Logger + }): Promise { + const googleClient = await getAuthenticatedGoogleClient({ + client, + ctx, + }) + const filesCache = new FilesCache(client, ctx) + return new Client(ctx, googleClient, filesCache, logger) + } + + public setCache(filesCache: FilesCache) { + this._filesCache = filesCache + } + + public async getRootFolderId(): Promise { + try { + const response = await this._googleClient.files.get({ fileId: MYDRIVE_ID_ALIAS }) + return response.data.id! + } catch (thrown: unknown) { + if (isGaxiosError(thrown) && thrown.toString().includes('File not found: ')) { + return thrown.toString().split('File not found: ')[1]!.slice(0, -1) + } + throw thrown + } + } + + public async listFiles({ nextToken }: ListItemsInput): Promise { + const { items: baseFiles, meta } = await this._listBaseNormalFiles({ nextToken }) + const completeFilesPromises = baseFiles.map((f) => this._getCompleteFileFromBaseFile(f)) + const items = await Promise.all(completeFilesPromises) + return { + items, + meta, + } + } + + private async _listBaseNormalFiles({ nextToken }: ListItemsInput): Promise> { + const { + items: newFiles, + meta: { nextToken: newNextToken }, + } = await this._listBaseGenericFiles({ + nextToken, + args: { + searchQuery: `mimeType != '${APP_GOOGLE_FOLDER_MIMETYPE}' and mimeType != '${APP_GOOGLE_SHORTCUT_MIMETYPE}'`, + }, + }) + + const items = newFiles.filter((f) => f.type === 'normal') + return { + items, + meta: { + nextToken: newNextToken, + }, + } + } + + public async listFolders({ nextToken }: ListItemsInput): Promise { + const { items: baseFolders, meta } = await this._listBaseFolderFiles({ nextToken }) + + const completeFoldersPromises = baseFolders.map((f) => this._getCompleteFolderFromBaseFolder(f)) + const items = await Promise.all(completeFoldersPromises) + return { + items, + meta, + } + } + + private async _listBaseFolderFiles({ nextToken }: ListItemsInput): Promise> { + const { + items: newFiles, + meta: { nextToken: newNextToken }, + } = await this._listBaseGenericFiles({ + nextToken, + args: { + searchQuery: `mimeType = '${APP_GOOGLE_FOLDER_MIMETYPE}'`, + }, + }) + if (nextToken === undefined) { + // My Drive is not returned by list operation but needs to be part of list, so we add it to first page + const myDriveFile = await this._fetchFile(MYDRIVE_ID_ALIAS) + newFiles.push(myDriveFile) + } + const items = newFiles.filter((f) => f.type === 'folder') + return { + items, + meta: { + nextToken: newNextToken, + }, + } + } + + public async getChildren(folderId: string): Promise { + const files = await listAllItems(this._listBaseGenericFiles.bind(this), { + searchQuery: this._getParentsFilter(folderId), + }) + return await Promise.all(files.map((f) => this._getCompleteFile(f))) + } + + public async getChildrenSubset({ + folderId, + extraQuery, + nextToken, + }: { + folderId: string + extraQuery?: string + nextToken?: string + }) { + const searchQuery = this._getParentsFilter(folderId) + (extraQuery ? ` and ${extraQuery}` : '') + const listResponse = await this._googleClient.files.list({ + corpora: 'user', + fields: GOOGLE_API_FILELIST_FIELDS, + q: `${searchQuery} and trashed != true`, + pageToken: nextToken, + pageSize: PAGE_SIZE, + spaces: 'drive', + ...INCLUDE_FILES_FROM_ALL_DRIVES, + }) + return { files: listResponse.data.files, nextToken: listResponse.data.nextPageToken ?? undefined } + } + + private _getParentsFilter(parentId: string): string { + return parentId === MYDRIVE_ID_ALIAS ? 'not trashed' : `'${parentId}' in parents` + } + + public async readGenericFile(id: string): Promise { + const file = await this._fetchFile(id) + return await this._getCompleteFile(file) + } + + public async readFile(id: string): Promise { + const file = await this._fetchFile(id) + if (file.type !== 'normal') { + throw new RuntimeError(`Attempted to read a file of type ${file.type}`) + } + return await this._getCompleteFileFromBaseFile(file) + } + + public async downloadFileData({ id }: { id: string }): Promise { + const file = await this._fetchFile(id) + if (file.type !== 'normal') { + throw new RuntimeError(`Attempted to download a file of type ${file.type}`) + } + + const exportType = await this._findExportType(file.mimeType) + let output: DownloadFileDataClientOutput + if (exportType) { + // File size is unknown when exporting, download all data to buffer to know size + const fileDownloadStream = await this._exportFileData(file, exportType) + const buffer = await streamToBuffer(fileDownloadStream, MAX_EXPORT_FILE_SIZE_BYTES) + output = { + mimeType: exportType, + dataSize: buffer.length, + dataType: 'buffer', + data: buffer, + } + } else { + output = { + mimeType: file.mimeType, + dataSize: file.size, + dataType: 'stream', + data: await this._fetchFileData(file), + } + } + return output + } + + private _getRateLimitErrorHandler(): (error: unknown) => Promise { + return async (error: unknown) => { + return handleRateLimitError(error, this._logger) + } + } + + private _getNotFoundErrorHandler(): (error: unknown) => Promise { + return async (error: unknown) => { + return handleNotFoundError(error, this._logger) + } + } + + private async _tryWatchAllListableGenericFiles( + listFn: ListFunction + ): Promise { + const fileChannels: FileChannel[] = [] + let hasError = false + await listItemsAndProcess(listFn, async (item) => { + const channel = await this._watch(item).catch(async (error: unknown) => { + if (isSubscriptionRateLimitError(error)) { + this._logger.forBot().warn('Subscription rate limit exceeded. Retry operation later.') + } else { + this._logger.forBot().warn(`Failed to subscribe to changes for '${item.name}' (${item.id})`) + } + return undefined + }) + if (channel) { + fileChannels.push(channel) + } else { + hasError = true + } + }) + return { + fileChannels, + hasError, + } + } + + public async watch(id: string): Promise { + const file = await this._fetchFile(id) + return await this._watch(file) + } + + /** + * @returns Channel if successful, undefined if the subscription rate limit is exceeded + */ + public async tryWatch(id: string): Promise { + return await this.watch(id).catch(this._getRateLimitErrorHandler()) + } + + private async _watch(file: BaseGenericFileUnion): Promise { + const absoluteExpirationTimeMs: number = Date.now() + MAX_RESOURCE_WATCH_EXPIRATION_DELAY_MS + const { id: fileId, mimeType } = file + const token = serializeToken( + { + fileId, + fileType: getFileTypeFromMimeType(mimeType), + }, + bp.secrets.WEBHOOK_SECRET + ) + const response = await this._googleClient.files.watch({ + fileId, + requestBody: { + id: uuidv4(), + type: 'web_hook', + address: `${process.env.BP_WEBHOOK_URL}/${this._ctx.webhookId}`, + token, + expiration: absoluteExpirationTimeMs.toString(), + }, + }) + const baseChannel = parseChannel(response.data) + this._logger.forBot().debug(`Watching file '${file.name}' (${file.id}): channel ID = ${baseChannel.id}`) + return { + ...baseChannel, + fileId, + } + } + + public async tryWatchAllFiles(): Promise { + return await this._tryWatchAllListableGenericFiles(this._listBaseNormalFiles.bind(this)) + } + + public async tryWatchAllFolders(): Promise { + return await this._tryWatchAllListableGenericFiles(this._listBaseFolderFiles.bind(this)) + } + + public async tryWatchAll(): Promise { + const [filesResult, foldersResult] = await Promise.all([this.tryWatchAllFiles(), this.tryWatchAllFolders()]) + return { + fileChannels: [...filesResult.fileChannels, ...foldersResult.fileChannels], + hasError: filesResult.hasError || foldersResult.hasError, + } + } + + public async unwatch(channels: FileChannel | FileChannel[]) { + if (!Array.isArray(channels)) { + channels = [channels] + } + const unwatchPromises = channels.map((channel) => { + const fileName = this._filesCache.find(channel.fileId)?.name ?? '[unknown]' + this._logger.forBot().debug(`Unwatching file ${fileName} (${channel.fileId}) with channel ID = ${channel.id}`) + const { id, resourceId } = channel + return this._googleClient.channels.stop({ + requestBody: { + id, + resourceId, + }, + }) + }) + await Promise.all(unwatchPromises) + } + + public async tryUnwatch(channels: FileChannel | FileChannel[]) { + await this.unwatch(channels).catch(this._getNotFoundErrorHandler()) + } + + /** + * Removes internal fields and adds computed attributes + */ + private async _getCompleteFileFromBaseFile(file: BaseNormalFile): Promise { + return { + ...file, + path: await this._getFilePath({ type: 'normal', ...file }), + } + } + + /** + * Removes internal fields and adds computed attributes + */ + private async _getCompleteFolderFromBaseFolder(file: BaseFolderFile): Promise { + const { id, mimeType, name, parentId } = file + return { + id, + mimeType, + name, + parentId, + path: await this._getFilePath({ type: 'folder', ...file }), + } + } + + private async _getCompleteFile(file: BaseDiscriminatedFile): Promise { + return { + ...file, + path: await this._getFilePath(file), + } + } + + private async _listBaseGenericFiles({ + nextToken, + args, + }: ListItemsInputWithArgs<{ searchQuery?: string }>): Promise> { + const searchQuery = args?.searchQuery + const listResponse = await this._googleClient.files.list({ + corpora: 'user', + fields: GOOGLE_API_FILELIST_FIELDS, + q: (searchQuery ?? '') + (searchQuery?.length ? ' and ' : '') + 'trashed != true', + pageToken: nextToken, + pageSize: PAGE_SIZE, + spaces: 'drive', + ...INCLUDE_FILES_FROM_ALL_DRIVES, + }) + + const newNextToken = listResponse.data.nextPageToken ?? undefined + const unvalidatedDriveFiles = listResponse.data.files + if (!unvalidatedDriveFiles) { + throw new RuntimeError('No files were returned by the API') + } + const newFiles = parseBaseGenerics(unvalidatedDriveFiles) + for (const newFile of newFiles) { + this._filesCache.set(newFile) + } + + return { + items: newFiles, + meta: { + nextToken: newNextToken, + }, + } + } + + private async _getOrFetchFile(id: string): Promise { + let file = this._filesCache.find(id) + if (!file) { + file = await this._fetchFile(id) + } + return file + } + + private async _fetchFile(id: string): Promise { + const response = await this._googleClient.files.get({ + fileId: id, + fields: GOOGLE_API_FILE_FIELDS, + ...INCLUDE_FILES_FROM_ALL_DRIVES, + }) + const file = parseBaseGeneric(response.data) + this._filesCache.set(file) + return file + } + + private async _fetchFileData({ id: fileId }: BaseNormalFile): Promise { + const fileDownloadResponse = await this._googleClient.files.get( + { + fileId, + alt: 'media', + ...INCLUDE_FILES_FROM_ALL_DRIVES, + }, + { + responseType: 'stream', + } + ) + return fileDownloadResponse.data + } + + private async _exportFileData({ id: fileId }: BaseNormalFile, mimeType: string): Promise { + const fileExportResponse = await this._googleClient.files.export( + { + fileId, + mimeType, + ...INCLUDE_FILES_FROM_ALL_DRIVES, + }, + { + responseType: 'stream', + } + ) + return fileExportResponse.data + } + + private async _fetchExportFormatMap(): Promise> { + const response = await this._googleClient.about.get({ + fields: GOOGLE_API_EXPORTFORMATS_FIELDS, + }) + const { exportFormats } = response.data + if (!exportFormats) { + throw new RuntimeError('Export formats are missing in Schema$About from the API response') + } + return exportFormats + } + + /** + * @returns The export type to use, or undefined if the file cannot be exported + */ + private async _findExportType(originalContentType: string): Promise { + const exportFormatMap = await this._fetchExportFormatMap() + const exportContentTypes = exportFormatMap[originalContentType] + if (!exportContentTypes) { + return undefined + } + + const indexableContentType = INDEXABLE_MIMETYPES.find((type) => exportContentTypes.includes(type)) + const defaultContentType = exportContentTypes[0] + return indexableContentType ?? defaultContentType + } + + private _getFilePath = async (file: BaseDiscriminatedFile, pathAcc?: string[]): Promise => { + const path = [file.name, ...(pathAcc ?? [])] + + if (!file.parentId) { + return path + } + + try { + const parent = await this._getOrFetchFile(file.parentId) + + return await this._getFilePath(parent, path) + } catch { + return path + } + } +} diff --git a/integrations/googledrivekb/src/error-handling.ts b/integrations/googledrivekb/src/error-handling.ts new file mode 100644 index 00000000000..8b182bb9f5b --- /dev/null +++ b/integrations/googledrivekb/src/error-handling.ts @@ -0,0 +1,63 @@ +import { createAsyncFnWrapperWithErrorRedaction, defaultErrorRedactor } from '@botpress/common' +import { z } from '@botpress/sdk' +import { Common } from 'googleapis' +import * as bp from '.botpress' + +export const wrapWithTryCatch = createAsyncFnWrapperWithErrorRedaction(defaultErrorRedactor) + +const errorDetailSchema = z.object({ + domain: z.string(), + reason: z.string(), + message: z.string(), +}) +type ErrorDetail = z.infer +// For some reason, the Google API typing for GaxiosError does not correspond +// to the actual error object returned by the API. It is missing the `errors` +// field which contains the actual error messages. This type is a workaround +// to properly type the error object. +export type AggregateGAxiosError = Common.GaxiosError & { errors: ErrorDetail[] } +export const isGaxiosError = (error: unknown): error is AggregateGAxiosError => { + return ( + error instanceof Error && + 'errors' in error && + Array.isArray(error['errors']) && + error['errors'].every((err) => errorDetailSchema.safeParse(err).success) + ) +} + +export type SubscriptionRateLimitError = AggregateGAxiosError // No discriminant +const SUBSCRIPTION_RATE_LIMIT_ERR_REASON = 'subscriptionRateLimitExceeded' +export const isSubscriptionRateLimitError = (error: unknown): error is SubscriptionRateLimitError => { + if (!isGaxiosError(error)) { + return false + } + return error.status === 403 && error.errors.some((err) => err.reason === SUBSCRIPTION_RATE_LIMIT_ERR_REASON) +} + +export type NotFoundError = AggregateGAxiosError // No discriminant +export const isNotFoundError = (error: unknown): error is NotFoundError => { + if (!isGaxiosError(error)) { + return false + } + return error.status === 404 +} + +export const handleRateLimitError = async (e: unknown, logger?: bp.Logger): Promise => { + if (!isSubscriptionRateLimitError(e)) { + throw e + } + if (logger) { + logger.forBot().warn('Subscription rate limit exceeded. Retry operation later.') + } + return undefined +} + +export const handleNotFoundError = async (e: unknown, logger?: bp.Logger): Promise => { + if (!isNotFoundError(e)) { + throw e + } + if (logger) { + logger.forBot().error(e.errors.map((err) => err.message).join('\n')) + } + return undefined +} diff --git a/integrations/googledrivekb/src/file-channels-cache.ts b/integrations/googledrivekb/src/file-channels-cache.ts new file mode 100644 index 00000000000..7c1efd09470 --- /dev/null +++ b/integrations/googledrivekb/src/file-channels-cache.ts @@ -0,0 +1,91 @@ +import { z } from '@botpress/sdk' +import { fileChannelSchema } from './schemas' +import { FileChannel } from './types' +import * as bp from '.botpress' + +const _fileChannelsSchema = z.record(z.string(), fileChannelSchema) +type FileChannels = z.infer +type FileChannelsArray = FileChannel[] +export class FileChannelsCache { + private _channels: FileChannels + private _dirty = false + + public constructor( + private _client: bp.Client, + private _ctx: bp.Context + ) { + this._channels = FileChannelsCache._getEmpty() + } + + public clear() { + this._channels = FileChannelsCache._getEmpty() + this._dirty = true + } + + public static async load({ client, ctx }: { client: bp.Client; ctx: bp.Context }) { + const getStateResponse = await client.getOrSetState({ + id: ctx.integrationId, + type: 'integration', + name: 'filesChannelsCache', + payload: { + filesChannelsCache: this._getEmpty(), + }, + }) + const fileChannels = new FileChannelsCache(client, ctx) + fileChannels._channels = getStateResponse.state.payload.filesChannelsCache + fileChannels._dirty = false + return fileChannels + } + + public async save() { + if (!this._dirty) { + return + } + + this._dirty = false + return await this._client.setState({ + id: this._ctx.integrationId, + type: 'integration', + name: 'filesChannelsCache', + payload: { + filesChannelsCache: this._channels, + }, + }) + } + + private static _getEmpty(): FileChannels { + return {} + } + + public remove(fileId: string): FileChannel | undefined { + const channel = this._channels[fileId] + delete this._channels[fileId] + this._dirty = true + return channel + } + + /** + * @returns Channel that was replaced + */ + public set(channel: FileChannel): FileChannel | undefined { + const oldChannel = this._channels[channel.fileId] + this._channels[channel.fileId] = channel + this._dirty = true + return oldChannel + } + + /** + * @returns Channels that were replaced + */ + public setAll(channels: FileChannelsArray): FileChannelsArray { + const newChannels = Object.fromEntries(channels.map((channel) => [channel.fileId, channel])) + const oldChannels = { ...this._channels } + this._channels = newChannels + this._dirty = true + return Object.values(oldChannels) + } + + public getAll(): FileChannelsArray { + return Object.values(this._channels) + } +} diff --git a/integrations/googledrivekb/src/file-event-handler.ts b/integrations/googledrivekb/src/file-event-handler.ts new file mode 100644 index 00000000000..13f1787cf7d --- /dev/null +++ b/integrations/googledrivekb/src/file-event-handler.ts @@ -0,0 +1,51 @@ +import { Client as DriveClient } from './client' +import { FileChannelsCache } from './file-channels-cache' +import { FilesCache } from './files-cache' +import { BaseDiscriminatedFile, GenericFile } from './types' +import { Client } from '.botpress' + +export class FileEventHandler { + public constructor( + private _client: Client, + private _driveClient: DriveClient, + private _filesCache: FilesCache, + private _fileChannelsCache: FileChannelsCache + ) {} + + public async handleFileCreated(file: GenericFile) { + this._filesCache.set(file) // GenericFile is compatible with BaseDiscriminatedFile + const channel = await this._driveClient.tryWatch(file.id) + if (channel) { + this._fileChannelsCache.set(channel) + } + if (file.type === 'normal') { + await this._client.createEvent({ + type: 'fileCreated', + payload: file, + }) + } else if (file.type === 'folder') { + await this._client.createEvent({ + type: 'folderCreated', + payload: file, + }) + } + } + + // Work with BaseDiscriminatedFile, at this point the only file info available is in the cache + public async handleFileDeleted(baseFile: BaseDiscriminatedFile) { + this._fileChannelsCache.remove(baseFile.id) // No need to unwatch as resource is already deleted + this._filesCache.remove(baseFile.id) + + if (baseFile.type === 'normal') { + await this._client.createEvent({ + type: 'fileDeleted', + payload: { id: baseFile.id }, + }) + } else if (baseFile.type === 'folder') { + await this._client.createEvent({ + type: 'folderDeleted', + payload: { id: baseFile.id }, + }) + } + } +} diff --git a/integrations/googledrivekb/src/file-notification-token.ts b/integrations/googledrivekb/src/file-notification-token.ts new file mode 100644 index 00000000000..d5538b0ccca --- /dev/null +++ b/integrations/googledrivekb/src/file-notification-token.ts @@ -0,0 +1,29 @@ +import { z } from '@botpress/sdk' +import * as jwt from 'jsonwebtoken' +import { fileTypesUnionSchema } from './schemas' + +const tokenSchema = z.object({ + fileId: z.string().min(1), + fileType: fileTypesUnionSchema, +}) +export type Token = z.infer + +export const serializeToken = (token: Token, secret: string): string => { + return jwt.sign(token, secret, { + noTimestamp: true, + }) +} + +export const deserializeToken = (serializedToken: string, secret: string): Token | undefined => { + let object: unknown + try { + object = jwt.verify(serializedToken, secret) + } catch { + return undefined + } + const tokenParseResult = tokenSchema.safeParse(object) + if (!tokenParseResult.success) { + return undefined + } + return tokenParseResult.data +} diff --git a/integrations/googledrivekb/src/files-api-utils.ts b/integrations/googledrivekb/src/files-api-utils.ts new file mode 100644 index 00000000000..d26c922cbe5 --- /dev/null +++ b/integrations/googledrivekb/src/files-api-utils.ts @@ -0,0 +1,56 @@ +import * as sdk from '@botpress/sdk' +import axios, { AxiosError } from 'axios' +import type { Client as DriveClient } from './client' +import * as bp from '.botpress' + +export const downloadToBotpress = async ({ + client, + driveClient, + botpressFileKey, + googleDriveFileId, + indexFile, +}: { + client: bp.Client + driveClient: DriveClient + googleDriveFileId: string + botpressFileKey: string + indexFile?: boolean +}) => { + const content = await driveClient.downloadFileData({ id: googleDriveFileId }) + const { mimeType, dataSize, dataType, data } = content + const uploadParams = { + key: botpressFileKey, + contentType: mimeType, + index: indexFile ?? false, + } + let botpressFileId: string + let botpressFileUrl: string + if (dataType === 'stream') { + const upsertResponse = await client.upsertFile({ + ...uploadParams, + size: dataSize, + }) + botpressFileId = upsertResponse.file.id + botpressFileUrl = upsertResponse.file.url + await axios + .put(upsertResponse.file.uploadUrl, data, { + maxBodyLength: dataSize, + headers: { + 'Content-Type': mimeType, + 'Content-Length': dataSize, + }, + }) + .catch((reason: AxiosError) => { + throw new sdk.RuntimeError(`Error uploading file stream: ${reason}`) + }) + } else { + const uploadResponse = await client.uploadFile({ + ...uploadParams, + content: data, + }) + botpressFileId = uploadResponse.file.id + botpressFileUrl = uploadResponse.file.url + } + + return { botpressFileId, botpressFileUrl } +} diff --git a/integrations/googledrivekb/src/files-cache.ts b/integrations/googledrivekb/src/files-cache.ts new file mode 100644 index 00000000000..001d63eff2b --- /dev/null +++ b/integrations/googledrivekb/src/files-cache.ts @@ -0,0 +1,101 @@ +import { RuntimeError, z } from '@botpress/sdk' +import { baseDiscriminatedFileSchema } from './schemas' +import { BaseFolderFile, BaseDiscriminatedFile, BaseNormalFile } from './types' +import * as bp from '.botpress' + +const _filesMapSchema = z.record(z.string(), baseDiscriminatedFileSchema) +type FilesMap = z.infer + +export class FilesCache { + private _map: FilesMap + public constructor( + private _client: bp.Client, + private _ctx: bp.Context + ) { + this._map = {} + } + + public clear() { + this._map = {} + } + + public static async load({ client, ctx }: { client: bp.Client; ctx: bp.Context }): Promise { + const getStateResponse = await client.getOrSetState({ + id: ctx.integrationId, + type: 'integration', + name: 'filesCache', + payload: { + filesCache: FilesCache._getEmpty(), + }, + }) + const cache = new FilesCache(client, ctx) + cache._map = getStateResponse.state.payload.filesCache + return cache + } + + public async save() { + await this._client.setState({ + id: this._ctx.integrationId, + type: 'integration', + name: 'filesCache', + payload: { + filesCache: this._map, + }, + }) + } + + private static _getEmpty(): FilesMap { + return {} + } + + public find(id: string): BaseDiscriminatedFile | undefined { + return this._map[id] + } + + public set(file: BaseDiscriminatedFile) { + this._map[file.id] = file + } + + public remove(id: string) { + delete this._map[id] + } + + private _getGenericFile(id: string): BaseDiscriminatedFile { + const file = this._map[id] + if (!file) { + throw new RuntimeError(`Couldn't get file from files map with ID=${id}`) + } + return file + } + + public get(id: string): BaseDiscriminatedFile { + return this._getGenericFile(id) + } + + public getAll(filterFn?: (file: BaseDiscriminatedFile) => boolean): BaseDiscriminatedFile[] { + const allFiles = Object.values(this._map) + return filterFn ? allFiles.filter(filterFn) : allFiles + } + + /** + * @throws {RuntimeError} ID must correspond to a file compatible with BaseNormalFile + */ + public getFile(id: string): BaseNormalFile { + const file = this._getGenericFile(id) + if (file.type !== 'normal') { + throw new RuntimeError(`Attempted to get file with ID=${file.id} as a normal file but is type ${file.type}`) + } + return file + } + + /** + * @throws {RuntimeError} ID must correspond to a file compatible with BaseFolderFile + */ + public getFolder(id: string): BaseFolderFile { + const file = this._getGenericFile(id) + if (file.type !== 'folder') { + throw new RuntimeError(`Attempted to get file with ID=${file.id} as a folder file but is type ${file.type}`) + } + return file + } +} diff --git a/integrations/googledrivekb/src/files-readonly/actions/index.ts b/integrations/googledrivekb/src/files-readonly/actions/index.ts new file mode 100644 index 00000000000..14f3ef08eb1 --- /dev/null +++ b/integrations/googledrivekb/src/files-readonly/actions/index.ts @@ -0,0 +1,11 @@ +import { filesReadonlyListItemsInFolder } from './list-items-in-folder' +import { filesReadonlyTransferFileToBotpress } from './transfer-file-to-botpress' +import * as bp from '.botpress' + +export const filesReadonlyActions = { + filesReadonlyListItemsInFolder, + filesReadonlyTransferFileToBotpress, +} as const satisfies Pick< + bp.IntegrationProps['actions'], + 'filesReadonlyListItemsInFolder' | 'filesReadonlyTransferFileToBotpress' +> diff --git a/integrations/googledrivekb/src/files-readonly/actions/list-items-in-folder.ts b/integrations/googledrivekb/src/files-readonly/actions/list-items-in-folder.ts new file mode 100644 index 00000000000..c109a590595 --- /dev/null +++ b/integrations/googledrivekb/src/files-readonly/actions/list-items-in-folder.ts @@ -0,0 +1,199 @@ +import { isApiError } from '@botpress/client' +import { APP_GOOGLE_FOLDER_MIMETYPE, APP_GOOGLE_SHORTCUT_MIMETYPE } from 'src/mime-types' +import { Client as DriveClient } from '../../client' +import { GoogleDriveNodeTree, type GoogleDriveNode } from '../google-drive-file-tree' +import * as bp from '.botpress' + +const GOOGLE_DRIVE_TREE_FILE_KEY = 'google-drive-file-tree.json' +const SYNTHETIC_NEXT_TOKEN_PREFIX = 'synthetic-tree-index:' +const SYNTHETIC_BATCH_SIZE = 100 + +type FilesReadonlyListItemsInFolderReturn = bp.actions.Actions['filesReadonlyListItemsInFolder']['output'] +type FilesReadonlyListItemsInFolderProps = bp.ActionProps['filesReadonlyListItemsInFolder'] + +export const filesReadonlyListItemsInFolder: bp.IntegrationProps['actions']['filesReadonlyListItemsInFolder'] = async ( + props +) => (props.input.folderId ? await _listItemsInSpecificFolder(props) : await _listItemsInRootFolder(props)) + +const _listItemsInSpecificFolder = async ( + props: FilesReadonlyListItemsInFolderProps +): Promise => { + const nodeTree = await _loadNodeTree(props.client) + if (!nodeTree) { + return { items: [], meta: { nextToken: undefined } } + } + const node = nodeTree.getNodeById(props.input.folderId!) + + return _enumerateNodeChildren({ node, nextToken: props.input.nextToken }) +} + +const _enumerateNodeChildren = ({ node, nextToken }: { node?: GoogleDriveNode; nextToken?: string }) => { + const nodeChildren = node?.children ?? [] + const batchChildIndex = parseInt(nextToken?.slice(SYNTHETIC_NEXT_TOKEN_PREFIX.length) ?? '0', 10) + + if (batchChildIndex >= nodeChildren.length) { + return { items: [], meta: { nextToken: undefined } } + } + + const nextBatchIndex = batchChildIndex + SYNTHETIC_BATCH_SIZE + const currentBatch = nodeChildren.slice(batchChildIndex, nextBatchIndex) + const mappedBatchItems = currentBatch.map(_mapNodeToBatchItem) + + return { + items: mappedBatchItems, + meta: { + nextToken: nextBatchIndex < nodeChildren.length ? _getNextTokenForChildIndex(nextBatchIndex) : undefined, + }, + } +} + +const _mapNodeToBatchItem = (item: GoogleDriveNode) => + ({ + id: item.id, + name: item.name, + parentId: item.parents?.[0] ?? 'root', + ...(item.mimeType === APP_GOOGLE_FOLDER_MIMETYPE + ? { + type: 'folder' as const, + } + : { + type: 'file' as const, + sizeInBytes: parseInt(item.size ?? '0', 10), + lastModifiedDate: item.modifiedTime, + contentHash: item.sha256Checksum ?? item.md5Checksum ?? item.version ?? undefined, + }), + }) as const + +const _listItemsInRootFolder = async ( + props: FilesReadonlyListItemsInFolderProps +): Promise => { + if (props.input.nextToken?.startsWith(SYNTHETIC_NEXT_TOKEN_PREFIX)) { + return await _enumerateNodeTreeItems(props.client, props.input.nextToken) + } + + const driveClient = await DriveClient.create(props) + const filterString = _buildFilterString(props.input.filters) + + return props.input.nextToken + ? await _enumerateDriveItemsNextPage(driveClient, props.client, props.input.nextToken, filterString) + : await _enumerateDriveItemsFirstPage(driveClient, props.client, filterString) +} + +const _enumerateNodeTreeItems = async ( + client: FilesReadonlyListItemsInFolderProps['client'], + nextToken: string +): Promise => { + const nodeTree = await _loadNodeTree(client) + if (!nodeTree) { + return { items: [], meta: { nextToken: undefined } } + } + const rootNode = nodeTree.getRootNode() + + return _enumerateNodeChildren({ node: rootNode, nextToken }) +} + +const _loadNodeTree = async ( + client: FilesReadonlyListItemsInFolderProps['client'] +): Promise => { + try { + const { file } = await client.getFile({ id: GOOGLE_DRIVE_TREE_FILE_KEY }) + const res = await fetch(file.url) + if (!res.ok) { + throw new Error(`Failed to fetch node tree file: HTTP ${res.status}`) + } + return GoogleDriveNodeTree.fromJSON(await res.text()) + } catch (thrown: unknown) { + if (isApiError(thrown) && thrown.type === 'ResourceNotFound') { + return null + } + throw thrown + } +} + +const _getNextTokenForChildIndex = (childIndex: number): string => `${SYNTHETIC_NEXT_TOKEN_PREFIX}${childIndex}` + +const _buildFilterString = (filters: FilesReadonlyListItemsInFolderProps['input']['filters']): string => { + const query: string[] = [] + + if (filters?.itemType === 'file') { + query.push(`mimeType != '${APP_GOOGLE_FOLDER_MIMETYPE}'`, `mimeType != '${APP_GOOGLE_SHORTCUT_MIMETYPE}'`) + } else if (filters?.itemType === 'folder') { + query.push(`mimeType = '${APP_GOOGLE_FOLDER_MIMETYPE}'`) + } else { + query.push(`mimeType != '${APP_GOOGLE_SHORTCUT_MIMETYPE}'`) + } + + if (filters?.maxSizeInBytes) { + query.push(`size <= ${filters.maxSizeInBytes}`) + } + + if (filters?.modifiedAfter) { + query.push(`modifiedTime > '${filters.modifiedAfter}'`) + } + + return query.join(' and ') +} + +const _enumerateDriveItemsFirstPage = async ( + driveClient: DriveClient, + client: bp.Client, + filters: string +): Promise => { + const rootFolderId = await driveClient.getRootFolderId() + const nodeTree = new GoogleDriveNodeTree({ rootFolderId }) + + return await _enumerateGoogleDriveAndBuildNodeTree({ + driveClient, + client, + nextToken: undefined, + filters, + nodeTree, + }) +} + +const _enumerateDriveItemsNextPage = async ( + driveClient: DriveClient, + client: FilesReadonlyListItemsInFolderProps['client'], + nextToken: string, + filters: string +): Promise => { + const rootFolderId = await driveClient.getRootFolderId() + const nodeTree = (await _loadNodeTree(client)) ?? new GoogleDriveNodeTree({ rootFolderId }) + + return await _enumerateGoogleDriveAndBuildNodeTree({ driveClient, client, nextToken, filters, nodeTree }) +} + +const _enumerateGoogleDriveAndBuildNodeTree = async ({ + driveClient, + client, + nextToken, + filters, + nodeTree, +}: { + driveClient: DriveClient + client: FilesReadonlyListItemsInFolderProps['client'] + nextToken: string | undefined + filters: string + nodeTree: GoogleDriveNodeTree +}): Promise => { + const { files, nextToken: newNextToken } = await driveClient.getChildrenSubset({ + folderId: 'root', + extraQuery: filters, + nextToken, + }) + + for (const item of files ?? []) { + nodeTree.upsertNode(item as GoogleDriveNode) + } + + await _saveNodeTree(client, newNextToken ? nodeTree : nodeTree.removeAllEmptyFoldersRecursively()) + + return { items: [], meta: { nextToken: newNextToken ?? _getNextTokenForChildIndex(0) } } +} + +const _saveNodeTree = async (client: bp.Client, nodeTree: GoogleDriveNodeTree): Promise => { + await client.uploadFile({ + key: GOOGLE_DRIVE_TREE_FILE_KEY, + content: nodeTree.toJSON(), + }) +} diff --git a/integrations/googledrivekb/src/files-readonly/actions/transfer-file-to-botpress.ts b/integrations/googledrivekb/src/files-readonly/actions/transfer-file-to-botpress.ts new file mode 100644 index 00000000000..92d190e0111 --- /dev/null +++ b/integrations/googledrivekb/src/files-readonly/actions/transfer-file-to-botpress.ts @@ -0,0 +1,18 @@ +import { downloadToBotpress } from 'src/files-api-utils' +import { Client as DriveClient } from '../../client' +import * as bp from '.botpress' + +export const filesReadonlyTransferFileToBotpress: bp.IntegrationProps['actions']['filesReadonlyTransferFileToBotpress'] = + async (props) => { + const driveClient = await DriveClient.create(props) + + const { botpressFileId } = await downloadToBotpress({ + botpressFileKey: props.input.fileKey, + googleDriveFileId: props.input.file.id, + client: props.client, + driveClient, + indexFile: props.input.shouldIndex, + }) + + return { botpressFileId } + } diff --git a/integrations/googledrivekb/src/files-readonly/google-drive-file-tree.test.ts b/integrations/googledrivekb/src/files-readonly/google-drive-file-tree.test.ts new file mode 100644 index 00000000000..4323811669c --- /dev/null +++ b/integrations/googledrivekb/src/files-readonly/google-drive-file-tree.test.ts @@ -0,0 +1,390 @@ +import { it, expect, describe } from 'vitest' +import { + GoogleDriveNodeTree, + SHARED_DRIVES_ID, + SHARED_WITH_ME_ID, + type GoogleDriveNode, +} from './google-drive-file-tree' +import { APP_GOOGLE_FOLDER_MIMETYPE } from '../mime-types' + +const DUMMY_ROOT_FOLDER_ID = '0AMKYlhzXYUfqUk9PVA' +const DUMMY_SHARED_DRIVE_ID = '0AJcxkTsZHSqGUk9PVA' + +const _createMockTree = () => new GoogleDriveNodeTree({ rootFolderId: DUMMY_ROOT_FOLDER_ID }) + +const _createMockFile = (overrides: Partial = {}): GoogleDriveNode => ({ + id: 'example123', + name: 'example.txt', + mimeType: 'text/plain', + trashed: false, + version: '1', + modifiedTime: '2025-01-01T00:00:00.000Z', + shared: false, + ...overrides, +}) + +const _createMockFolder = (overrides: Partial = {}): GoogleDriveNode => ({ + id: 'folder123', + name: 'Example Folder', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + trashed: false, + version: '1', + modifiedTime: '2025-01-01T00:00:00.000Z', + shared: false, + ...overrides, +}) + +describe.concurrent('GoogleDriveFileTree', () => { + it('should create a root node with the correct properties', () => { + // Arrange + const tree = _createMockTree() + + // Act + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + id: DUMMY_ROOT_FOLDER_ID, + name: 'My Drive', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + }) + }) + + it("should correctly add a folder that's a direct child of 'My Drive'", () => { + // Arrange + const tree = _createMockTree() + const folder = _createMockFolder({ + parents: [DUMMY_ROOT_FOLDER_ID], + id: '1subfolder123', + name: 'Work Documents', + }) + + // Act + tree.upsertNode(folder) + const root = tree.getRootNode() + + // Assert + expect(root.children).toHaveLength(1) + expect(root).toMatchObject({ + children: [folder], + }) + }) + + it("should correctly add a folder that's a nested child of 'My Drive'", () => { + // Arrange + const tree = _createMockTree() + const parentId = '1parent123' + const nestedFolder = _createMockFolder({ + parents: [parentId], + id: '1nested123', + name: 'Nested Folder', + }) + + // Act + tree.upsertNode(nestedFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: parentId, + name: `[${parentId}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [nestedFolder], + }), + ], + }) + }) + + it("should correctly add a folder that's a direct child of a shared drive", () => { + // Arrange + const tree = _createMockTree() + const sharedDriveFolder = _createMockFolder({ + parents: [DUMMY_SHARED_DRIVE_ID], + id: '1shared123', + name: 'Team Documents', + driveId: DUMMY_SHARED_DRIVE_ID, + }) + + // Act + tree.upsertNode(sharedDriveFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: SHARED_DRIVES_ID, + name: 'Shared drives', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + id: DUMMY_SHARED_DRIVE_ID, + name: `[${DUMMY_SHARED_DRIVE_ID}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [expect.objectContaining(sharedDriveFolder)], + }), + ], + }), + ], + }) + }) + + it("should correctly add a folder that's a nested child of a shared drive", () => { + // Arrange + const tree = _createMockTree() + const parentId = '1sharedparent123' + const sharedDriveFolder = _createMockFolder({ + parents: [parentId], + id: '1sharednested123', + name: 'Project Files', + driveId: DUMMY_SHARED_DRIVE_ID, + }) + + // Act + tree.upsertNode(sharedDriveFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: SHARED_DRIVES_ID, + name: 'Shared drives', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + id: DUMMY_SHARED_DRIVE_ID, + name: `[${DUMMY_SHARED_DRIVE_ID}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + id: parentId, + name: `[${parentId}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [expect.objectContaining(sharedDriveFolder)], + }), + ], + }), + ], + }), + ], + }) + }) + + it("should correctly add a folder that's a direct child of 'Shared with me'", () => { + // Arrange + const tree = _createMockTree() + const sharedWithMeFolder = _createMockFolder({ + id: '1sharedwithme123', + name: 'Collaboration Folder', + sharedWithMeTime: '2024-11-20T20:32:17.302Z', + shared: true, + }) + + // Act + tree.upsertNode(sharedWithMeFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: SHARED_WITH_ME_ID, + name: 'Shared with me', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [sharedWithMeFolder], + }), + ], + }) + }) + + it("should correctly add a folder that's a nested child of 'Shared with me'", () => { + // Arrange + const tree = _createMockTree() + const sharedWithMeParentFolder = _createMockFolder({ + id: '1sharedparent123', + name: 'Main Project', + sharedWithMeTime: '2024-11-20T20:32:17.302Z', + shared: true, + }) + const nestedSharedWithMeFolder = _createMockFolder({ + parents: [sharedWithMeParentFolder.id], + id: '1sharednested123', + name: 'Subfolder', + shared: true, + }) + + // Act + tree.upsertNode(sharedWithMeParentFolder) + tree.upsertNode(nestedSharedWithMeFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: SHARED_WITH_ME_ID, + name: 'Shared with me', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + ...sharedWithMeParentFolder, + children: [expect.objectContaining(nestedSharedWithMeFolder)], + }), + ], + }), + ], + }) + }) + + describe.concurrent('automatic rebalancing', () => { + it("should correctly rebalance to 'Shared with me' when previously under 'My Drive'", () => { + // Arrange + const tree = _createMockTree() + const sharedWithMeParentFolder = _createMockFolder({ + id: '1rebalanceparent123', + name: 'Rebalance Test', + sharedWithMeTime: '2024-11-20T20:32:17.302Z', + shared: true, + }) + const nestedSharedWithMeFolder = _createMockFolder({ + parents: [sharedWithMeParentFolder.id], + id: '1rebalancenested123', + name: 'Nested Test', + shared: true, + }) + + // Act + tree.upsertNode(nestedSharedWithMeFolder) + tree.upsertNode(sharedWithMeParentFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: SHARED_WITH_ME_ID, + name: 'Shared with me', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + ...sharedWithMeParentFolder, + children: [expect.objectContaining(nestedSharedWithMeFolder)], + }), + ], + }), + ], + }) + }) + + it("should correctly rebalance from a stub subfolder under 'Shared drives' to a known folder", () => { + // Arrange + const tree = _createMockTree() + const parentId = '1stubparent123' + const sharedDriveSubItem = _createMockFile({ + parents: [parentId], + id: '1stubitem123', + name: 'image.jpg', + mimeType: 'image/jpeg', + driveId: DUMMY_SHARED_DRIVE_ID, + md5Checksum: 'abc123def456', + sha256Checksum: 'def456abc123', + size: '2995372', + }) + const sharedDriveParentFolder = _createMockFolder({ + parents: [DUMMY_SHARED_DRIVE_ID], + id: parentId, + name: 'Media Files', + driveId: DUMMY_SHARED_DRIVE_ID, + }) + + // Act + tree.upsertNode(sharedDriveSubItem) + tree.upsertNode(sharedDriveParentFolder) + const root = tree.getRootNode() + + // Assert + expect(root).toMatchObject({ + children: [ + expect.objectContaining({ + id: SHARED_DRIVES_ID, + name: 'Shared drives', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + id: DUMMY_SHARED_DRIVE_ID, + name: `[${DUMMY_SHARED_DRIVE_ID}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + children: [ + expect.objectContaining({ + ...sharedDriveParentFolder, + children: [expect.objectContaining(sharedDriveSubItem)], + }), + ], + }), + ], + }), + ], + }) + }) + }) + + describe.concurrent('empty folder pruning', () => { + it('should remove nested empty folders when calling removeAllEmptyFoldersRecursively', () => { + // Arrange + const tree = _createMockTree() + const parentEmptyFolder = _createMockFolder({ + parents: [DUMMY_ROOT_FOLDER_ID], + id: '1parentempty123', + name: 'Parent Empty Folder', + }) + const nestedEmptyFolder = _createMockFolder({ + parents: [parentEmptyFolder.id], + id: '1nestedempty123', + name: 'Nested Empty Folder', + }) + const nonEmptyFolder = _createMockFolder({ + parents: [DUMMY_ROOT_FOLDER_ID], + id: '1nonempty123', + name: 'Non-Empty Folder', + }) + const fileInNonEmptyFolder = _createMockFile({ + parents: [nonEmptyFolder.id], + id: '1file123', + name: 'file.txt', + }) + + // Act + tree.upsertNode(parentEmptyFolder) + tree.upsertNode(nestedEmptyFolder) + tree.upsertNode(nonEmptyFolder) + tree.upsertNode(fileInNonEmptyFolder) + + // Pre-Assert + expect(tree.getRootNode().children).toHaveLength(2) + + tree.removeAllEmptyFoldersRecursively() + const root = tree.getRootNode() + + // Assert + expect(root.children).toHaveLength(1) + expect(root).toMatchObject({ + children: [expect.objectContaining(nonEmptyFolder)], + }) + }) + + it('should not remove the root folder when calling removeAllEmptyFoldersRecursively', () => { + // Arrange + const tree = _createMockTree() + + // Act + tree.removeAllEmptyFoldersRecursively() + const root = tree.getRootNode() + + // Assert + expect(root.id).toBe(DUMMY_ROOT_FOLDER_ID) + }) + }) +}) diff --git a/integrations/googledrivekb/src/files-readonly/google-drive-file-tree.ts b/integrations/googledrivekb/src/files-readonly/google-drive-file-tree.ts new file mode 100644 index 00000000000..e7f10febcb7 --- /dev/null +++ b/integrations/googledrivekb/src/files-readonly/google-drive-file-tree.ts @@ -0,0 +1,303 @@ +import { APP_GOOGLE_FOLDER_MIMETYPE } from '../mime-types' + +export type GoogleDriveNode = { + id: string + name: string + mimeType: string + parents?: string[] + children?: GoogleDriveNode[] + trashed?: boolean + version?: string + modifiedTime?: string + shared?: boolean + driveId?: string + size?: string + md5Checksum?: string + sha256Checksum?: string + sharedWithMeTime?: string +} + +export const SHARED_WITH_ME_ID = 'sharedWithMe' +export const SHARED_DRIVES_ID = 'sharedDrives' + +export class GoogleDriveNodeTree { + private readonly _nodeByIdMap = new Map() + private readonly _childIdsByParentIdMap = new Map>() + private readonly _rootFolderId: string + + public constructor({ rootFolderId }: { rootFolderId: string }) { + this._rootFolderId = rootFolderId + this._createMyDriveRootNode() + } + + public getRootNode(): GoogleDriveNode { + return this._buildNodeWithAllDescendants(this._rootFolderId) + } + + public getNodeById(nodeId: string): GoogleDriveNode | undefined { + return this._nodeByIdMap.get(nodeId) + } + + public upsertNode(nodeToInsert: GoogleDriveNode): this { + if (nodeToInsert.id !== this._rootFolderId) { + const existingNodeWithSameId = this._nodeByIdMap.get(nodeToInsert.id) + const existingChildIdsForThisNode = new Set(this._childIdsByParentIdMap.get(nodeToInsert.id)) + + this._removeNodeFromItsPreviousParent(existingNodeWithSameId) + this._storeNodeInIdMap(nodeToInsert) + this._addNodeToItsNewParent(nodeToInsert) + this._preserveExistingChildrenForFolders(nodeToInsert, existingChildIdsForThisNode) + } + + this._upsertChildrenForNode(nodeToInsert) + return this + } + + public removeAllEmptyFoldersRecursively(): this { + const emptyFolderIds = this._findAllEmptyFolderIds() + this._removeEmptyFoldersFromTheirParents(emptyFolderIds) + this._deleteEmptyFoldersFromMaps(emptyFolderIds) + return this + } + + private _upsertChildrenForNode(nodeToInsert: GoogleDriveNode): void { + for (const child of nodeToInsert.children ?? []) { + this.upsertNode(child) + } + } + + private _createMyDriveRootNode(): void { + this._nodeByIdMap.set(this._rootFolderId, { + id: this._rootFolderId, + name: 'My Drive', + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + }) + this._childIdsByParentIdMap.set(this._rootFolderId, new Set()) + } + + private _removeNodeFromItsPreviousParent(existingNodeWithSameId: GoogleDriveNode | undefined): void { + if (!existingNodeWithSameId) return + const previousParentId = this._determineEffectiveParentId(existingNodeWithSameId) + this._childIdsByParentIdMap.get(previousParentId)?.delete(existingNodeWithSameId.id) + } + + private _storeNodeInIdMap(nodeToStore: GoogleDriveNode): void { + this._nodeByIdMap.set(nodeToStore.id, { ...nodeToStore }) + } + + private _addNodeToItsNewParent(nodeToAdd: GoogleDriveNode): void { + const newParentId = this._determineEffectiveParentId(nodeToAdd) + this._ensureParentNodeExists(newParentId, nodeToAdd) + this._ensureChildSetExistsForParent(newParentId) + this._childIdsByParentIdMap.get(newParentId)!.add(nodeToAdd.id) + } + + private _preserveExistingChildrenForFolders(nodeToCheck: GoogleDriveNode, existingChildIds: Set): void { + if (nodeToCheck.mimeType !== APP_GOOGLE_FOLDER_MIMETYPE) return + + this._ensureChildSetExistsForParent(nodeToCheck.id) + if (existingChildIds.size > 0) { + this._childIdsByParentIdMap.set(nodeToCheck.id, existingChildIds) + } + } + + private _ensureChildSetExistsForParent(parentNodeId: string): void { + if (!this._childIdsByParentIdMap.has(parentNodeId)) { + this._childIdsByParentIdMap.set(parentNodeId, new Set()) + } + } + + private _findAllEmptyFolderIds(): Set { + const emptyFolderIds = new Set() + let foundNewEmptyFoldersInThisIteration = true + + while (foundNewEmptyFoldersInThisIteration) { + foundNewEmptyFoldersInThisIteration = false + foundNewEmptyFoldersInThisIteration = this._findEmptyFoldersInCurrentIteration(emptyFolderIds) + this._removeEmptyFoldersFromTheirParents(emptyFolderIds) + } + + return emptyFolderIds + } + + private _findEmptyFoldersInCurrentIteration(alreadyFoundEmptyFolderIds: Set): boolean { + let foundNewEmptyFolderInThisIteration = false + + for (const [potentialParentId, childIdSet] of this._childIdsByParentIdMap.entries()) { + const potentialParentNode = this._nodeByIdMap.get(potentialParentId) + if (!potentialParentNode || alreadyFoundEmptyFolderIds.has(potentialParentId)) continue + + if (this._isFolderEmptyOfChildren(potentialParentNode, childIdSet)) { + alreadyFoundEmptyFolderIds.add(potentialParentId) + foundNewEmptyFolderInThisIteration = true + } + } + + return foundNewEmptyFolderInThisIteration + } + + private _isFolderEmptyOfChildren(nodeToCheck: GoogleDriveNode, childIdSet: Set): boolean { + return nodeToCheck.mimeType === APP_GOOGLE_FOLDER_MIMETYPE && childIdSet.size === 0 + } + + private _removeEmptyFoldersFromTheirParents(emptyFolderIds: Set): void { + for (const emptyFolderId of emptyFolderIds) { + if (emptyFolderId === this._rootFolderId) continue + const emptyFolderNode = this._nodeByIdMap.get(emptyFolderId) + if (!emptyFolderNode) continue + + const parentIdOfEmptyFolder = this._determineEffectiveParentId(emptyFolderNode) + this._childIdsByParentIdMap.get(parentIdOfEmptyFolder)?.delete(emptyFolderId) + } + } + + private _deleteEmptyFoldersFromMaps(emptyFolderIds: Set): void { + for (const emptyFolderId of emptyFolderIds) { + if (emptyFolderId === this._rootFolderId) continue + const emptyFolderNode = this._nodeByIdMap.get(emptyFolderId) + if (!emptyFolderNode) continue + + const parentIdOfEmptyFolder = this._determineEffectiveParentId(emptyFolderNode) + this._childIdsByParentIdMap.get(parentIdOfEmptyFolder)?.delete(emptyFolderId) + + this._nodeByIdMap.delete(emptyFolderId) + this._childIdsByParentIdMap.delete(emptyFolderId) + } + } + + private _buildNodeWithAllDescendants(nodeId: string): GoogleDriveNode { + const requestedNode = this._nodeByIdMap.get(nodeId) + if (!requestedNode) { + throw new Error(`Node ${nodeId} not found`) + } + + const allDescendantNodes = this._buildSortedDescendantNodes(nodeId) + + return { + ...structuredClone(requestedNode), + children: allDescendantNodes.length > 0 ? allDescendantNodes : undefined, + } + } + + private _buildSortedDescendantNodes(parentNodeId: string): GoogleDriveNode[] { + const childIdSet = this._childIdsByParentIdMap.get(parentNodeId) + if (!childIdSet || childIdSet.size === 0) return [] + + return Array.from(childIdSet) + .map((childId) => this._buildNodeWithAllDescendants(childId)) + .sort((nodeA, nodeB) => nodeA.name.localeCompare(nodeB.name)) + } + + private _determineEffectiveParentId(nodeToAnalyze: GoogleDriveNode): string { + if (nodeToAnalyze.sharedWithMeTime) { + return this._determineParentIdForSharedWithMeNode(nodeToAnalyze) + } + + if (nodeToAnalyze.driveId) { + return this._determineParentIdForSharedDriveNode(nodeToAnalyze) + } + + return nodeToAnalyze.parents?.[0] ?? this._rootFolderId + } + + private _determineParentIdForSharedWithMeNode(sharedWithMeNode: GoogleDriveNode): string { + this._ensureSpecialFolderNodeExists(SHARED_WITH_ME_ID, 'Shared with me') + + if (sharedWithMeNode.parents?.[0]) { + this._ensureParentExistsWithinSharedWithMeFolder(sharedWithMeNode.parents[0]) + return sharedWithMeNode.parents[0] + } + + return SHARED_WITH_ME_ID + } + + private _determineParentIdForSharedDriveNode(sharedDriveNode: GoogleDriveNode): string { + this._ensureSpecialFolderNodeExists(SHARED_DRIVES_ID, 'Shared drives') + this._ensureSharedDriveRootNodeExists(sharedDriveNode.driveId!, sharedDriveNode.driveId!) + + return sharedDriveNode.parents?.[0] ?? sharedDriveNode.driveId! + } + + private _ensureParentExistsWithinSharedWithMeFolder(parentIdWithinSharedWithMe: string): void { + if (this._nodeByIdMap.has(parentIdWithinSharedWithMe)) return + + this._createPlaceholderFolderNode(parentIdWithinSharedWithMe, [SHARED_WITH_ME_ID]) + this._ensureChildSetExistsForParent(SHARED_WITH_ME_ID) + this._childIdsByParentIdMap.get(SHARED_WITH_ME_ID)!.add(parentIdWithinSharedWithMe) + } + + private _ensureParentNodeExists(parentIdToCheck: string, childNodeRequiringParent: GoogleDriveNode): void { + if (this._nodeByIdMap.has(parentIdToCheck)) return + if (this._isSpecialSystemFolder(parentIdToCheck)) return + + const placeholderParentId = this._determinePlaceholderParentIdForMissingNode(childNodeRequiringParent) + this._createPlaceholderFolderNode(parentIdToCheck, [placeholderParentId]) + this._ensureChildSetExistsForParent(placeholderParentId) + this._childIdsByParentIdMap.get(placeholderParentId)!.add(parentIdToCheck) + } + + private _isSpecialSystemFolder(nodeIdToCheck: string): boolean { + return ( + nodeIdToCheck === SHARED_WITH_ME_ID || nodeIdToCheck === SHARED_DRIVES_ID || nodeIdToCheck === this._rootFolderId + ) + } + + private _determinePlaceholderParentIdForMissingNode(childNodeNeedingParent: GoogleDriveNode): string { + if (childNodeNeedingParent.sharedWithMeTime) { + this._ensureSpecialFolderNodeExists(SHARED_WITH_ME_ID, 'Shared with me') + return SHARED_WITH_ME_ID + } + + if (childNodeNeedingParent.driveId) { + this._ensureSpecialFolderNodeExists(SHARED_DRIVES_ID, 'Shared drives') + this._ensureSharedDriveRootNodeExists(childNodeNeedingParent.driveId, childNodeNeedingParent.driveId) + return childNodeNeedingParent.driveId + } + + return this._rootFolderId + } + + private _createPlaceholderFolderNode(placeholderNodeId: string, parentIds: string[]): void { + this._nodeByIdMap.set(placeholderNodeId, { + id: placeholderNodeId, + name: `[${placeholderNodeId}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + parents: parentIds, + }) + this._childIdsByParentIdMap.set(placeholderNodeId, new Set()) + } + + private _ensureSpecialFolderNodeExists(specialFolderId: string, displayName: string): void { + if (this._nodeByIdMap.has(specialFolderId)) return + + this._nodeByIdMap.set(specialFolderId, { + id: specialFolderId, + name: displayName, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + }) + this._childIdsByParentIdMap.set(specialFolderId, new Set()) + this._childIdsByParentIdMap.get(this._rootFolderId)!.add(specialFolderId) + } + + private _ensureSharedDriveRootNodeExists(sharedDriveRootNodeId: string, sharedDriveId: string): void { + if (this._nodeByIdMap.has(sharedDriveRootNodeId)) return + + this._nodeByIdMap.set(sharedDriveRootNodeId, { + id: sharedDriveRootNodeId, + name: `[${sharedDriveId}]`, + mimeType: APP_GOOGLE_FOLDER_MIMETYPE, + parents: [SHARED_DRIVES_ID], + }) + this._childIdsByParentIdMap.set(sharedDriveRootNodeId, new Set()) + this._childIdsByParentIdMap.get(SHARED_DRIVES_ID)!.add(sharedDriveRootNodeId) + } + + public toJSON(): string { + return JSON.stringify(this.getRootNode()) + } + + public static fromJSON(json: string): GoogleDriveNodeTree { + const rootNode = JSON.parse(json) + return new GoogleDriveNodeTree({ rootFolderId: rootNode.id }).upsertNode(rootNode) + } +} diff --git a/integrations/googledrivekb/src/handler.ts b/integrations/googledrivekb/src/handler.ts new file mode 100644 index 00000000000..ff94e51d9c4 --- /dev/null +++ b/integrations/googledrivekb/src/handler.ts @@ -0,0 +1,119 @@ +import * as oauthWizard from '@botpress/common/src/oauth-wizard' +import * as sdk from '@botpress/sdk' +import { updateRefreshTokenFromAuthorizationCode } from './auth' +import { Client } from './client' +import { FileChannelsCache } from './file-channels-cache' +import { FileEventHandler } from './file-event-handler' +import { FilesCache } from './files-cache' +import { NotificationHandler } from './notification-handler' +import { notificationSchema } from './schemas' +import * as bp from '.botpress' + +export const handler: bp.IntegrationProps['handler'] = async (props) => { + const { req, client, ctx, logger } = props + + if (oauthWizard.isOAuthWizardUrl(req.path)) { + return await _handleOAuthWizard(props) + } + + const notifParseResult = notificationSchema.safeParse(req) + if (!notifParseResult.success) { + console.error('Invalid request:', notifParseResult.error) + return { + status: 400, + body: 'Invalid request', + } + } + + const notification = notifParseResult.data + if (!NotificationHandler.isSupported(notification)) { + return + } + + const driveClient = await Client.create({ client, ctx, logger }) + const filesCache = await FilesCache.load({ client, ctx }) + const fileChannelsCache = await FileChannelsCache.load({ client, ctx }) + const fileEventHandler = new FileEventHandler(client, driveClient, filesCache, fileChannelsCache) + const notificationHandler = new NotificationHandler(driveClient, filesCache, fileEventHandler) + await notificationHandler.handle(notification) + await filesCache.save() + await fileChannelsCache.save() + return +} + +const _handleOAuthWizard = async (props: bp.HandlerProps): Promise => { + const { client, ctx } = props + + const wizard = new oauthWizard.OAuthWizardBuilder(props) + + .addStep({ + id: 'start', + handler({ responses }) { + return responses.displayButtons({ + pageTitle: 'Google Drive Knowledge Base Integration', + htmlOrMarkdownPageContents: ` + This wizard will reset your Google Drive Knowledge Base integration. This means + that the integration will cease to function until you complete the + authorization process. + + Do you wish to continue? + `, + buttons: [ + { + action: 'external', + label: 'Yes, continue', + navigateToUrl: _getOAuthAuthorizationUri(ctx), + buttonType: 'primary', + }, + { action: 'close', label: 'No, cancel', buttonType: 'secondary' }, + ], + }) + }, + }) + + .addStep({ + id: 'oauth-callback', + async handler({ query, responses }) { + const authorizationCode = query.get('code') + + if (!authorizationCode) { + console.error('Error extracting code from url in OAuth handler') + return responses.endWizard({ + success: false, + errorMessage: 'Error extracting code from url in OAuth handler', + }) + } + + await updateRefreshTokenFromAuthorizationCode({ authorizationCode, client, ctx }) + + // Done in order to correctly display the authorization status in the UI (not used for webhooks) + await client.configureIntegration({ + identifier: ctx.webhookId, + }) + + return responses.redirectToStep('end') + }, + }) + + .addStep({ + id: 'end', + handler({ responses }) { + return responses.endWizard({ + success: true, + }) + }, + }) + + .build() + + return await wizard.handleRequest() +} + +const _getOAuthAuthorizationUri = (ctx: { webhookId: string }) => + 'https://accounts.google.com/o/oauth2/v2/auth?scope=' + + 'https%3A//www.googleapis.com/auth/drive.readonly&access_type=offline' + + '&include_granted_scopes=true&response_type=code&prompt=consent' + + `&state=${ctx.webhookId}&redirect_uri=${encodeURI(_getOAuthRedirectUri().href)}` + + `&client_id=${bp.secrets.CLIENT_ID}` + +const _getOAuthRedirectUri = () => oauthWizard.getWizardStepUrl('oauth-callback') diff --git a/integrations/googledrivekb/src/index.ts b/integrations/googledrivekb/src/index.ts new file mode 100644 index 00000000000..82dcd197c58 --- /dev/null +++ b/integrations/googledrivekb/src/index.ts @@ -0,0 +1,19 @@ +import { sentry as sentryHelpers } from '@botpress/sdk-addons' +import actions from './actions' +import { handler } from './handler' +import { register, unregister } from './setup' +import * as bp from '.botpress' + +const integration = new bp.Integration({ + register, + unregister, + actions, + channels: {}, + handler, +}) + +export default sentryHelpers.wrapIntegration(integration, { + dsn: bp.secrets.SENTRY_DSN, + environment: bp.secrets.SENTRY_ENVIRONMENT, + release: bp.secrets.SENTRY_RELEASE, +}) diff --git a/integrations/googledrivekb/src/mime-types.ts b/integrations/googledrivekb/src/mime-types.ts new file mode 100644 index 00000000000..b227da11c58 --- /dev/null +++ b/integrations/googledrivekb/src/mime-types.ts @@ -0,0 +1,15 @@ +export const APP_PDF_MIMETYPE = 'application/pdf' +export const TEXT_HTML_MIMETYPE = 'text/html' +export const TEXT_MARKDOWN_MIMETYPE = 'text/markdown' +export const TEXT_PLAIN_MIMETYPE = 'text/plain' + +export const APP_GOOGLE_DOCS_MIMETYPE = 'application/vnd.google-apps.document' +export const APP_GOOGLE_SHEETS_MIMETYPE = 'application/vnd.google-apps.spreadsheet' +export const APP_GOOGLE_SLIDES_MIMETYPE = 'application/vnd.google-apps.presentation' + +export const APP_GOOGLE_FOLDER_MIMETYPE = 'application/vnd.google-apps.folder' +export const APP_GOOGLE_SHORTCUT_MIMETYPE = 'application/vnd.google-apps.shortcut' + +// Order of types in the array should reflect the priority of type when automatically choosing type for export +// Prioritize types best suited for indexing +export const INDEXABLE_MIMETYPES = [APP_PDF_MIMETYPE, TEXT_HTML_MIMETYPE, TEXT_MARKDOWN_MIMETYPE, TEXT_PLAIN_MIMETYPE] diff --git a/integrations/googledrivekb/src/notification-handler.ts b/integrations/googledrivekb/src/notification-handler.ts new file mode 100644 index 00000000000..8ea8b4fcdef --- /dev/null +++ b/integrations/googledrivekb/src/notification-handler.ts @@ -0,0 +1,59 @@ +import { Client } from './client' +import { FileEventHandler } from './file-event-handler' +import { deserializeToken, Token } from './file-notification-token' +import { FilesCache } from './files-cache' +import { Notification } from './types' +import * as bp from '.botpress' + +export class NotificationHandler { + public constructor( + private _driveClient: Client, + private _filesCache: FilesCache, + private _fileEventHandler: FileEventHandler + ) {} + + public static isSupported(notification: Notification): boolean { + const type = notification.headers['x-goog-resource-state'] + return type === 'update' || type === 'remove' + } + + public async handle(notification: Notification): Promise { + const type = notification.headers['x-goog-resource-state'] + const changes = notification.headers['x-goog-changed'] + const serializedToken = notification.headers['x-goog-channel-token'] + const token = deserializeToken(serializedToken, bp.secrets.WEBHOOK_SECRET) + if (!token) { + console.error('Invalid notification token:', token) + return + } + if (type === 'update') { + for (const change of changes) { + if (change === 'children') { + await this._handleUpdateChildrenNotif(token) + } + } + } else if (type === 'remove') { + await this._handleRemoveNotif(token) + } + } + + private async _handleUpdateChildrenNotif(token: Token) { + if (token.fileType !== 'folder') { + return + } + const currentChildren = await this._driveClient.getChildren(token.fileId) + for (const child of currentChildren) { + if (!this._filesCache.find(child.id)) { + await this._fileEventHandler.handleFileCreated(child) + } + } + } + + private async _handleRemoveNotif(token: Token) { + const baseFile = this._filesCache.find(token.fileId) + if (!baseFile) { + return + } + await this._fileEventHandler.handleFileDeleted(baseFile) + } +} diff --git a/integrations/googledrivekb/src/schemas.ts b/integrations/googledrivekb/src/schemas.ts new file mode 100644 index 00000000000..3cc3fe8da02 --- /dev/null +++ b/integrations/googledrivekb/src/schemas.ts @@ -0,0 +1,169 @@ +import { z } from '@botpress/sdk' +import { APP_GOOGLE_FOLDER_MIMETYPE, APP_GOOGLE_SHORTCUT_MIMETYPE } from './mime-types' + +// Utility schemas +export const fileIdSchema = z.string().min(1).describe('The ID of the Google Drive file') +export const commonFileAttrSchema = z.object({ + id: fileIdSchema.title('File ID'), + name: z.string().min(1).title('Name').describe('The name of the file'), + parentId: z + .string() + .min(1) + .optional() + .title('Parent ID') + .describe("The ID of the file that is the parent of this file. If not set, 'My Drive' is the parent"), + mimeType: z.string().min(1).title('MIME Type').describe('The media type of the file'), +}) + +export const baseNormalFileSchema = commonFileAttrSchema.extend({ + size: z.number().nonnegative().title('Size').describe('The size in bytes of the file'), + contentHash: z + .string() + .optional() + .title('Content Hash') + .describe('The hash of the file content, or version/revision number'), + lastModifiedDate: z + .string() + .datetime() + .optional() + .title('Last Modified Date') + .describe('The last modified date of the file in RFC 3339 format'), +}) + +export const baseFolderFileSchema = commonFileAttrSchema.extend({ + mimeType: z.literal(APP_GOOGLE_FOLDER_MIMETYPE).title('MIME Type').describe('The media type of the folder'), +}) + +export const baseShortcutFileSchema = commonFileAttrSchema.extend({ + mimeType: z.literal(APP_GOOGLE_SHORTCUT_MIMETYPE).title('MIME Type').describe('The media type of the shortcut'), +}) + +const _fileTypesArray = ['normal', 'folder', 'shortcut'] as const +export const fileTypesEnumSchema = z.enum(_fileTypesArray) +const _fileTypes = fileTypesEnumSchema.Enum +export const fileTypesUnionSchema = z.union([ + z.literal(_fileTypes.normal), + z.literal(_fileTypes.folder), + z.literal(_fileTypes.shortcut), +]) + +/* Used to represent a generic file, closer to what is received by the API. +Type is added to enable discrimination and remove/add access to properties +depending on file type. */ +export const baseDiscriminatedFileSchema = z.discriminatedUnion('type', [ + baseNormalFileSchema.extend({ type: z.literal(_fileTypes.normal).title('Type').describe('The type of the file') }), + baseFolderFileSchema.extend({ type: z.literal(_fileTypes.folder).title('Type').describe('The type of the file') }), + baseShortcutFileSchema.extend({ + type: z.literal(_fileTypes.shortcut).title('Type').describe('The type of the file'), + }), +]) + +export const baseChannelSchema = z.object({ + id: z.string().min(1).title('Channel ID').describe('The ID of the channel'), + resourceId: z + .string() + .min(1) + .title('Resource ID') + .describe('The ID of the watched resource (different from the file ID)'), +}) + +const notificationTypesSchema = z.union([ + z.literal('sync'), + z.literal('add'), + z.literal('remove'), + z.literal('update'), + z.literal('trash'), + z.literal('untrash'), + z.literal('change'), +]) +const updateDetailTypesSchema = z.union([ + z.literal('content'), + z.literal('properties'), + z.literal('parents'), + z.literal('children'), + z.literal('permissions'), +]) +export const notificationSchema = z.object({ + headers: z.object({ + 'x-goog-resource-state': notificationTypesSchema, + 'x-goog-changed': z + .string() + .optional() // May be present on 'update' notifications + .transform((details) => details?.split(',') ?? []) + .pipe(z.array(updateDetailTypesSchema)), + 'x-goog-channel-token': z.string(), + }), +}) + +// Entities +const computedFileAttrSchema = z.object({ + path: z + .array( + z + .string() + .min(1) + .describe("A component of the path of the file. It corresponds to the name of one of it's parents.") + ) + .title('Path') + .describe("An array of the path's components sorted by level (root to leaf)"), +}) +export const fileSchema = baseNormalFileSchema.merge(computedFileAttrSchema) +export const folderSchema = baseFolderFileSchema.merge(computedFileAttrSchema) +export const shortcutSchema = baseShortcutFileSchema.merge(computedFileAttrSchema) +export const genericFileSchema = z.discriminatedUnion('type', [ + fileSchema.extend({ type: z.literal(_fileTypes.normal) }), + folderSchema.extend({ type: z.literal(_fileTypes.folder) }), + shortcutSchema.extend({ type: z.literal(_fileTypes.shortcut) }), +]) +export const fileChannelSchema = baseChannelSchema.extend({ + fileId: fileIdSchema.title('File ID'), +}) + +// Action args/outputs +function createListOutputSchema(itemSchema: T) { + return z.object({ + items: z + .array(itemSchema) + .describe( + 'The list of items listed in Google Drive. Results may be paginated. If set, use nextToken to get additional results' + ), + meta: z + .object({ + nextToken: z + .string() + .optional() + .describe('The token to pass as input to the next call of the list action to list additional items'), + }) + .describe('Metadata about the list results, including pagination information'), + }) +} + +export const readFileArgSchema = z.object({ id: fileIdSchema.title('File ID') }) +export const listItemsInputSchema = z.object({ + nextToken: z.string().optional().title('Next Token').describe('The token to use to get the next page of results'), +}) +export const listItemsOutputSchema = createListOutputSchema(z.any()) +export const listFilesOutputSchema = createListOutputSchema(fileSchema) +export const listFoldersOutputSchema = createListOutputSchema(folderSchema) + +export const downloadFileDataArgSchema = z.object({ + id: z.string().min(1).title('File ID').describe('The ID of the Google Drive file whose content will be downloaded'), + index: z.boolean().title('Index File').describe('Indicates if the file is to be indexed or not'), +}) +export const downloadFileDataOutputSchema = z.object({ + bpFileId: z + .string() + .min(1) + .describe('The Botpress file ID corresponding to the file that was uploaded from Google Drive to the Files API'), + url: z + .string() + .min(1) + .describe('The URL to access the file content. Use this instead of constructing the URL from the file ID.'), +}) + +export const fileDeletedEventSchema = z.object({ + id: fileIdSchema.title('File ID'), +}) +export const folderDeletedEventSchema = z.object({ + id: fileIdSchema.title('Folder ID'), +}) diff --git a/integrations/googledrivekb/src/setup.ts b/integrations/googledrivekb/src/setup.ts new file mode 100644 index 00000000000..7701aee18b5 --- /dev/null +++ b/integrations/googledrivekb/src/setup.ts @@ -0,0 +1,2 @@ +export const register = async () => {} +export const unregister = async () => {} diff --git a/integrations/googledrivekb/src/types.ts b/integrations/googledrivekb/src/types.ts new file mode 100644 index 00000000000..d22dc2ec737 --- /dev/null +++ b/integrations/googledrivekb/src/types.ts @@ -0,0 +1,54 @@ +import { z } from '@botpress/sdk' +import { google, drive_v3 } from 'googleapis' +import { + baseDiscriminatedFileSchema, + baseNormalFileSchema, + baseFolderFileSchema, + baseShortcutFileSchema, + fileSchema, + folderSchema, + commonFileAttrSchema, + listFilesOutputSchema, + listFoldersOutputSchema, + downloadFileDataArgSchema, + downloadFileDataOutputSchema, + listItemsOutputSchema, + listItemsInputSchema, + fileChannelSchema, + notificationSchema, + fileTypesUnionSchema, + baseChannelSchema, + genericFileSchema, + shortcutSchema, +} from './schemas' + +type Overwrite = Omit & NewT + +export type GoogleDriveClient = drive_v3.Drive +export type UnvalidatedGoogleDriveFile = drive_v3.Schema$File +export type UnvalidatedGoogleDriveChannel = drive_v3.Schema$Channel + +export type CommonFileAttr = z.infer +export type BaseDiscriminatedFile = z.infer +export type BaseNormalFile = z.infer +export type BaseFolderFile = z.infer +export type BaseShortcutFile = z.infer +export type BaseGenericFileUnion = BaseNormalFile | BaseFolderFile | BaseShortcutFile +export type BaseFileChannel = z.infer +export type FileType = z.infer +export type Notification = z.infer + +export type File = z.infer +export type Folder = z.infer +export type Shortcut = z.infer +export type GenericFile = z.infer +export type FileChannel = z.infer + +export type ListItemsInput = z.infer +export type ListItemsOutput = Overwrite, { items: T[] }> +export type ListFilesOutput = z.infer +export type ListFoldersOutput = z.infer +export type DownloadFileDataArgs = z.infer +export type DownloadFileDataOutput = z.infer + +export type GoogleOAuth2Client = InstanceType<(typeof google.auth)['OAuth2']> diff --git a/integrations/googledrivekb/src/utils.ts b/integrations/googledrivekb/src/utils.ts new file mode 100644 index 00000000000..970e95aff85 --- /dev/null +++ b/integrations/googledrivekb/src/utils.ts @@ -0,0 +1,57 @@ +import { Readable } from 'stream' +import { ListItemsInput, ListItemsOutput } from './types' + +export type ListFunction = (input: ListItemsInput) => Promise> +export type ListItemsInputWithArgs = ListItemsInput & { + args?: T +} +export type ListFunctionWithArgs = (input: ListItemsInputWithArgs) => Promise> + +export const streamToBuffer = (stream: Readable, maxBufferSize: number): Promise => { + return new Promise((resolve, reject) => { + const chunkArray: Buffer[] = [] + let size = 0 + stream + .on('data', (chunk: Buffer) => { + size += chunk.length + if (size > maxBufferSize) { + stream.destroy() + reject(new Error(`Max buffer size exceeded while converting stream to buffer (${maxBufferSize})`)) + return + } + chunkArray.push(chunk) + }) + .on('end', () => { + resolve(Buffer.concat(chunkArray)) + }) + .on('error', (err) => { + reject(err) + }) + }) +} + +export const listItemsAndProcess = async ( + listFn: ListFunctionWithArgs, + processFn: (item: T) => Promise, + args?: U +) => { + let nextToken: string | undefined = undefined + do { + const { items, meta } = await listFn({ nextToken, args }) + for (const item of items) { + await processFn(item) + } + nextToken = meta.nextToken + } while (nextToken) +} + +export const listAllItems = async (listFn: ListFunctionWithArgs, args?: U): Promise => { + const items: T[] = [] + let nextToken: string | undefined = undefined + do { + const { items: currentItems, meta } = await listFn({ nextToken, args }) + items.push(...currentItems) + nextToken = meta.nextToken + } while (nextToken) + return items +} diff --git a/integrations/googledrivekb/src/validation.ts b/integrations/googledrivekb/src/validation.ts new file mode 100644 index 00000000000..ac512a9dfcf --- /dev/null +++ b/integrations/googledrivekb/src/validation.ts @@ -0,0 +1,146 @@ +import { RuntimeError } from '@botpress/sdk' +import { APP_GOOGLE_FOLDER_MIMETYPE, APP_GOOGLE_SHORTCUT_MIMETYPE } from './mime-types' +import { baseFolderFileSchema, baseNormalFileSchema, baseShortcutFileSchema } from './schemas' +import { + BaseFolderFile, + BaseDiscriminatedFile, + BaseNormalFile, + BaseShortcutFile, + CommonFileAttr, + UnvalidatedGoogleDriveFile, + FileType, + UnvalidatedGoogleDriveChannel, + BaseFileChannel, +} from './types' + +export const parseChannel = (channel: UnvalidatedGoogleDriveChannel): BaseFileChannel => { + const { id, resourceId } = channel + if (!resourceId) { + throw new RuntimeError('Resource ID is missing in Schema$Channel from the API response') + } + + if (!id) { + throw new RuntimeError('Channel ID is missing in Schema$Channel from the API response') + } + + return { + id, + resourceId, + } +} + +export const getFileTypeFromMimeType = (mimeType: string): FileType => { + switch (mimeType) { + case APP_GOOGLE_FOLDER_MIMETYPE: + return 'folder' + case APP_GOOGLE_SHORTCUT_MIMETYPE: + return 'shortcut' + default: + return 'normal' + } +} + +export const parseBaseGenerics = (files: UnvalidatedGoogleDriveFile[]): BaseDiscriminatedFile[] => { + return files.map((f) => parseBaseGeneric(f)) +} + +export const parseBaseGeneric = (unvalidatedFile: UnvalidatedGoogleDriveFile): BaseDiscriminatedFile => { + const { mimeType } = parseCommonFileAttr(unvalidatedFile) + let file: BaseDiscriminatedFile + const type = getFileTypeFromMimeType(mimeType) + switch (type) { + case 'folder': + file = { + ...parseBaseFolder(unvalidatedFile), + type, + } + break + case 'shortcut': + file = { + ...parseBaseShortcut(unvalidatedFile), + type, + } + break + default: + file = { + ...parseBaseNormal(unvalidatedFile), + type, + } + break + } + + return file +} + +export const parseBaseNormal = (unvalidatedFile: UnvalidatedGoogleDriveFile): BaseNormalFile => { + const commmonFileAttr = parseCommonFileAttr(unvalidatedFile) + const { size: sizeStr, sha256Checksum, md5Checksum, version, modifiedTime } = unvalidatedFile + + const size = parseInt(sizeStr ?? '0') + if (isNaN(size)) { + throw new RuntimeError( + `Invalid size returned in Schema$File from the API response for file with name=${commmonFileAttr.name} (size=${sizeStr})` + ) + } + + const parseResult = baseNormalFileSchema.safeParse({ + ...commmonFileAttr, + size, + contentHash: (sha256Checksum || md5Checksum || version) ?? undefined, + lastModifiedDate: modifiedTime ?? undefined, + }) + if (parseResult.error) { + throw new RuntimeError('Error validating Schema$File received from the API response') + } + return parseResult.data +} + +const parseCommonFileAttr = (unvalidatedFile: UnvalidatedGoogleDriveFile): CommonFileAttr => { + const { id, name, mimeType } = unvalidatedFile + if (!id) { + throw new RuntimeError('File ID is missing in Schema$File from the API response') + } + + if (!name) { + throw new RuntimeError( + `Name is missing in Schema$File from the API response for file with ID=${unvalidatedFile.id}` + ) + } + + if (!mimeType) { + throw new RuntimeError(`MIME type is missing in Schema$File from the API response for file with name=${name}`) + } + + let parentId: string | undefined = undefined + if (unvalidatedFile.parents) { + parentId = unvalidatedFile.parents[0] + if (!parentId) { + throw new RuntimeError(`Empty parent ID array in Schema$File from the API response for file with name=${name}`) + } + } + + return { + id, + name, + mimeType, + parentId, + } +} + +const parseBaseFolder = (unvalidatedFile: UnvalidatedGoogleDriveFile): BaseFolderFile => { + const commmonFileAttr = parseCommonFileAttr(unvalidatedFile) + const parseResult = baseFolderFileSchema.safeParse(commmonFileAttr) + if (parseResult.error) { + throw new RuntimeError('Error validating Schema$File received from the API response') + } + return parseResult.data +} + +const parseBaseShortcut = (unvalidatedFile: UnvalidatedGoogleDriveFile): BaseShortcutFile => { + const commmonFileAttr = parseCommonFileAttr(unvalidatedFile) + const parseResult = baseShortcutFileSchema.safeParse(commmonFileAttr) + if (parseResult.error) { + throw new RuntimeError('Error validating Schema$File received from the API response') + } + return parseResult.data +} diff --git a/integrations/googledrivekb/tsconfig.json b/integrations/googledrivekb/tsconfig.json new file mode 100644 index 00000000000..9e1ac987193 --- /dev/null +++ b/integrations/googledrivekb/tsconfig.json @@ -0,0 +1,11 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "jsx": "react-jsx", + "jsxImportSource": "preact", + "types": ["preact"], + "paths": { "*": ["./*"] }, + "outDir": "dist" + }, + "include": [".botpress/**/*", "definitions/**/*", "src/**/*", "*.ts"] +} diff --git a/integrations/googledrivekb/vitest.config.ts b/integrations/googledrivekb/vitest.config.ts new file mode 100644 index 00000000000..15790f99dc3 --- /dev/null +++ b/integrations/googledrivekb/vitest.config.ts @@ -0,0 +1,2 @@ +import config from '../../vitest.config' +export default config diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7edaa12b03e..6f292cb3a87 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1213,6 +1213,49 @@ importers: specifier: ^10.26.6 version: 10.26.6 + integrations/googledrivekb: + dependencies: + '@botpress/client': + specifier: workspace:* + version: link:../../packages/client + '@botpress/common': + specifier: workspace:* + version: link:../../packages/common + '@botpress/sdk': + specifier: workspace:* + version: link:../../packages/sdk + '@botpress/sdk-addons': + specifier: workspace:* + version: link:../../packages/sdk-addons + axios: + specifier: ^1.7.7 + version: 1.13.6 + googleapis: + specifier: ^144.0.0 + version: 144.0.0 + jsonwebtoken: + specifier: ^9.0.2 + version: 9.0.2 + uuid: + specifier: ^9.0.0 + version: 9.0.1 + devDependencies: + '@botpress/cli': + specifier: workspace:* + version: link:../../packages/cli + '@sentry/cli': + specifier: ^2.39.1 + version: 2.39.1 + '@types/jsonwebtoken': + specifier: ^9.0.3 + version: 9.0.6 + '@types/uuid': + specifier: ^9.0.1 + version: 9.0.1 + preact: + specifier: ^10.26.6 + version: 10.26.6 + integrations/grafana: dependencies: '@botpress/sdk': @@ -21087,7 +21130,7 @@ snapshots: jwks-rsa@3.1.0: dependencies: '@types/express': 4.17.17 - '@types/jsonwebtoken': 9.0.3 + '@types/jsonwebtoken': 9.0.6 debug: 4.4.0 jose: 4.15.2 limiter: 1.1.5 @@ -24000,7 +24043,7 @@ snapshots: universal-github-app-jwt@1.1.1: dependencies: - '@types/jsonwebtoken': 9.0.3 + '@types/jsonwebtoken': 9.0.6 jsonwebtoken: 9.0.2 universal-user-agent@6.0.0: {} From c774fa544a7194082d5cadcad57d50e7a8aa2c42 Mon Sep 17 00:00:00 2001 From: Francois Levasseur Date: Tue, 26 May 2026 15:44:30 -0400 Subject: [PATCH 2/5] chore: tmp disable sharepoint deployement to fix CD --- .github/workflows/deploy-integrations-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-integrations-production.yml b/.github/workflows/deploy-integrations-production.yml index a49a74805c4..f6b9fa031bb 100644 --- a/.github/workflows/deploy-integrations-production.yml +++ b/.github/workflows/deploy-integrations-production.yml @@ -31,7 +31,7 @@ jobs: uses: ./.github/actions/deploy-integrations with: environment: 'production' - extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana'" + extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana' -F '!sharepoint'" force: ${{ github.event.inputs.force == 'true' }} sentry_auth_token: ${{ secrets.SENTRY_AUTH_TOKEN }} token_cloud_ops_account: ${{ secrets.PRODUCTION_TOKEN_CLOUD_OPS_ACCOUNT }} From c6054c27750d639c22807824f9d16a17f9048a9f Mon Sep 17 00:00:00 2001 From: Francois Levasseur Date: Tue, 26 May 2026 16:11:48 -0400 Subject: [PATCH 3/5] chore: re-enable sharepoint deployement --- .github/workflows/deploy-integrations-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-integrations-production.yml b/.github/workflows/deploy-integrations-production.yml index f6b9fa031bb..a49a74805c4 100644 --- a/.github/workflows/deploy-integrations-production.yml +++ b/.github/workflows/deploy-integrations-production.yml @@ -31,7 +31,7 @@ jobs: uses: ./.github/actions/deploy-integrations with: environment: 'production' - extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana' -F '!sharepoint'" + extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana'" force: ${{ github.event.inputs.force == 'true' }} sentry_auth_token: ${{ secrets.SENTRY_AUTH_TOKEN }} token_cloud_ops_account: ${{ secrets.PRODUCTION_TOKEN_CLOUD_OPS_ACCOUNT }} From 95c6c66b575d354e2c9dd890841923ca64f26c82 Mon Sep 17 00:00:00 2001 From: Francois Levasseur Date: Tue, 26 May 2026 16:16:38 -0400 Subject: [PATCH 4/5] chore: tmp disable googledrivekb deployement to fix CD --- .github/workflows/deploy-integrations-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-integrations-production.yml b/.github/workflows/deploy-integrations-production.yml index a49a74805c4..177ef546d45 100644 --- a/.github/workflows/deploy-integrations-production.yml +++ b/.github/workflows/deploy-integrations-production.yml @@ -31,7 +31,7 @@ jobs: uses: ./.github/actions/deploy-integrations with: environment: 'production' - extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana'" + extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana' -F '!googledrivekb'" force: ${{ github.event.inputs.force == 'true' }} sentry_auth_token: ${{ secrets.SENTRY_AUTH_TOKEN }} token_cloud_ops_account: ${{ secrets.PRODUCTION_TOKEN_CLOUD_OPS_ACCOUNT }} From b105b44f7f52c6039fe285a8c4e4f1a6e02bd557 Mon Sep 17 00:00:00 2001 From: Francois Levasseur Date: Tue, 26 May 2026 16:45:06 -0400 Subject: [PATCH 5/5] chore: re-enable googledrivekb deployement --- .github/workflows/deploy-integrations-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-integrations-production.yml b/.github/workflows/deploy-integrations-production.yml index 177ef546d45..a49a74805c4 100644 --- a/.github/workflows/deploy-integrations-production.yml +++ b/.github/workflows/deploy-integrations-production.yml @@ -31,7 +31,7 @@ jobs: uses: ./.github/actions/deploy-integrations with: environment: 'production' - extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana' -F '!googledrivekb'" + extra_filter: "-F '!docusign' -F '!zendesk' -F '!chat' -F '!grafana'" force: ${{ github.event.inputs.force == 'true' }} sentry_auth_token: ${{ secrets.SENTRY_AUTH_TOKEN }} token_cloud_ops_account: ${{ secrets.PRODUCTION_TOKEN_CLOUD_OPS_ACCOUNT }}