netlify · pieh · May 15, 2025 · May 16, 2025 · May 16, 2025 · May 16, 2025
@@ -29,6 +29,7 @@ const coreStep: CoreStepFunction = async function ({
   netlifyConfig,
   explicitSecretKeys,
   enhancedSecretScan,
+  featureFlags,
   systemLog,
   deployId,
   api,
@@ -37,8 +38,9 @@ const coreStep: CoreStepFunction = async function ({
 
   const passedSecretKeys = (explicitSecretKeys || '').split(',')
   const envVars = netlifyConfig.build.environment as Record<string, unknown>
+  const useReadLine = !featureFlags?.secret_scanning_no_readline
 
-  systemLog?.({ passedSecretKeys, buildDir })
+  systemLog?.({ passedSecretKeys, buildDir, useReadLine })
 
   if (!isSecretsScanningEnabled(envVars)) {
     logSecretsScanSkipMessage(logs, 'Secrets scanning disabled via SECRETS_SCAN_ENABLED flag set to false.')
@@ -91,6 +93,7 @@ const coreStep: CoreStepFunction = async function ({
         keys: keysToSearchFor,
         base: buildDir as string,
         filePaths,
+        useReadLine,
       })
 
       secretMatches = scanResults.matches.filter((match) => explicitSecretKeysToScanFor.includes(match.key))
@@ -103,6 +106,7 @@ const coreStep: CoreStepFunction = async function ({
         enhancedSecretsScanMatchesCount: enhancedSecretMatches.length,
         secretsFilesCount: scanResults.scannedFilesCount,
         keysToSearchFor,
+        useReadLine,
       }
 
       systemLog?.(attributesForLogsAndSpan)

@@ -17,6 +17,7 @@ interface ScanArgs {
   keys: string[]
   base: string
   filePaths: string[]
+  useReadLine: boolean
 }
 
 interface MatchResult {
@@ -215,7 +216,13 @@ const omitPathMatches = (relativePath, omitPaths) => {
  * @param scanArgs {ScanArgs} scan options
  * @returns promise with all of the scan results, if any
  */
-export async function scanFilesForKeyValues({ env, keys, filePaths, base }: ScanArgs): Promise<ScanResults> {
+export async function scanFilesForKeyValues({
+  env,
+  keys,
+  filePaths,
+  base,
+  useReadLine,
+}: ScanArgs): Promise<ScanResults> {
   const scanResults: ScanResults = {
     matches: [],
     scannedFilesCount: 0,
@@ -245,6 +252,8 @@ export async function scanFilesForKeyValues({ env, keys, filePaths, base }: Scan
 
   let settledPromises: PromiseSettledResult<MatchResult[]>[] = []
 
+  const searchStream = useReadLine ? searchStreamReadline : searchStreamNoReadline
+
   // process the scanning in batches to not run into memory issues by
   // processing all files at the same time.
   while (filePaths.length > 0) {
@@ -269,7 +278,14 @@ export async function scanFilesForKeyValues({ env, keys, filePaths, base }: Scan
   return scanResults
 }
 
-const searchStream = (basePath: string, file: string, keyValues: Record<string, string[]>): Promise<MatchResult[]> => {
+/**
+ * Search stream implementation using node:readline
+ */
+const searchStreamReadline = (
+  basePath: string,
+  file: string,
+  keyValues: Record<string, string[]>,
+): Promise<MatchResult[]> => {
   return new Promise((resolve, reject) => {
     const filePath = path.resolve(basePath, file)
 
@@ -391,6 +407,143 @@ const searchStream = (basePath: string, file: string, keyValues: Record<string,
   })
 }
 
+/**
+ * Search stream implementation using just read stream that allows to buffer less content
+ */
+const searchStreamNoReadline = (
+  basePath: string,
+  file: string,
+  keyValues: Record<string, string[]>,
+): Promise<MatchResult[]> => {
+  return new Promise((resolve, reject) => {
+    const filePath = path.resolve(basePath, file)
+
+    const inStream = createReadStream(filePath)
+    const matches: MatchResult[] = []
+
+    const keyVals: string[] = ([] as string[]).concat(...Object.values(keyValues))
+
+    const maxValLength = Math.max(0, ...keyVals.map((v) => v.length))
+    if (maxValLength === 0) {
+      // no non-empty values to scan for
+      return matches
+    }
+
+    const minValLength = Math.min(...keyVals.map((v) => v.length))
+
+    function getKeyForValue(val) {
+      let key = ''
+      for (const [secretKeyName, valuePermutations] of Object.entries(keyValues)) {
+        if (valuePermutations.includes(val)) {
+          key = secretKeyName
+        }
+      }
+      return key
+    }
+
+    let buffer = ''
+
+    function getCurrentBufferNewLineIndexes() {
+      const newLinesIndexesInCurrentBuffer = [] as number[]
+      let newLineIndex = -1
+      while ((newLineIndex = buffer.indexOf('\n', newLineIndex + 1)) !== -1) {
+        newLinesIndexesInCurrentBuffer.push(newLineIndex)
+      }
+
+      return newLinesIndexesInCurrentBuffer
+    }
+    let fileIndex = 0
+    let processedLines = 0
+    const foundIndexes = new Map<string, Set<number>>()
+    const foundLines = new Map<string, Set<number>>()
+    inStream.on('data', function (chunk) {
+      const newChunk = chunk.toString()
+
+      buffer += newChunk
+
+      let newLinesIndexesInCurrentBuffer = null as null | number[]
+
+      if (buffer.length > minValLength) {
+        for (const valVariant of keyVals) {
+          let valVariantIndex = -1
+          while ((valVariantIndex = buffer.indexOf(valVariant, valVariantIndex + 1)) !== -1) {
+            const pos = fileIndex + valVariantIndex
+            let foundIndexesForValVariant = foundIndexes.get(valVariant)
+            if (!foundIndexesForValVariant?.has(pos)) {
+              if (newLinesIndexesInCurrentBuffer === null) {
+                newLinesIndexesInCurrentBuffer = getCurrentBufferNewLineIndexes()
+              }
+
+              let lineNumber = processedLines + 1
+              for (const newLineIndex of newLinesIndexesInCurrentBuffer) {
+                if (valVariantIndex > newLineIndex) {
+                  lineNumber++
+                } else {
+                  break
+                }
+              }
+
+              let foundLinesForValVariant = foundLines.get(valVariant)
+              if (!foundLinesForValVariant?.has(lineNumber)) {
+                matches.push({
+                  file,
+                  lineNumber,
+                  key: getKeyForValue(valVariant),
+                })
+
+                if (!foundLinesForValVariant) {
+                  foundLinesForValVariant = new Set<number>()
+                  foundLines.set(valVariant, foundLinesForValVariant)
+                }
+                foundLinesForValVariant.add(lineNumber)
+              }
+
+              if (!foundIndexesForValVariant) {
+                foundIndexesForValVariant = new Set<number>()
+                foundIndexes.set(valVariant, foundIndexesForValVariant)
+              }
+              foundIndexesForValVariant.add(pos)
+            }
+          }
+        }
+      }
+
+      if (buffer.length > maxValLength) {
+        const lengthDiff = buffer.length - maxValLength
+        fileIndex += lengthDiff
+        if (newLinesIndexesInCurrentBuffer === null) {
+          newLinesIndexesInCurrentBuffer = getCurrentBufferNewLineIndexes()
+        }
+
+        // advanced processed lines
+        for (const newLineIndex of newLinesIndexesInCurrentBuffer) {
+          if (newLineIndex < lengthDiff) {
+            processedLines++
+          } else {
+            break
+          }
+        }
+
+        // Keep the last part of the buffer to handle split values across chunks
+        buffer = buffer.slice(-maxValLength)
+      }
+    })
+
+    inStream.on('error', function (error: any) {
+      if (error?.code === 'EISDIR') {
+        // file path is a directory - do nothing
+        resolve(matches)
+      } else {
+        reject(error)
+      }
+    })
+
+    inStream.on('close', function () {
+      resolve(matches)
+    })
+  })
+}
+
 /**
  * ScanResults are all of the finds for all keys and their disparate locations. Scanning is
  * async in streams so order can change a lot. Some matches are the result of an env var explictly being marked as secret,

@@ -0,0 +1,38 @@
+import { randomBytes } from "node:crypto";
+import { createWriteStream, mkdirSync } from "node:fs";
+
+mkdirSync('dist', { recursive: true });
+
+const writer = createWriteStream('dist/out.txt', { flags: "w" });
+
+async function writeLotOfBytesWithoutNewLines() {
+  const max_size = 128 * 1024 * 1024; // 128MB
+  const chunk_size = 1024 * 1024; // 1MB
+
+  let bytes_written = 0;
+  while (bytes_written < max_size) {
+    const bytes_to_write = Math.min(chunk_size, max_size - bytes_written);
+    const buffer = randomBytes(bytes_to_write).map((byte) =>
+      // swap LF and CR to something else 
+      byte === 0x0d || byte === 0x0a ? 0x0b : byte
+    );
+
+    writer.write(buffer);
+    bytes_written += bytes_to_write;
+  }
+}
+
+await writeLotOfBytesWithoutNewLines()
+writer.write(process.env.ENV_SECRET)
+await writeLotOfBytesWithoutNewLines()
+
+await new Promise((resolve, reject) => {
+  writer.close(err => {
+    if (err) {
+      reject(err);
+    } else {
+      resolve();
+    }
+  })
+})
+
@@ -0,0 +1,3 @@
+[build]
+command = 'node generate.mjs'
+publish = "./dist"