Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions apps/site/scripts/blog-data/__test__/generate.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mock.module('node:fs', {
readable.push(`---\n`);
file.frontMatterContent.forEach(line => readable.push(`${line}\n`));
readable.push(`---\n`);
file.content?.forEach(line => readable.push(`${line}\n`));
readable.push(null);
readable.close = () => {};
return readable;
Expand Down Expand Up @@ -61,6 +62,125 @@ describe('generateBlogData', () => {
assert.equal(post.author, 'author');
});

it('should generate a description from the first content paragraph', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
`date: '2020-01-01T00:00:00.000Z'`,
`title: POST 1`,
`author: author`,
],
content: [
'',
'## Summary',
'',
'Read the [Node.js release notes](https://nodejs.org/) for',
'**runtime** updates and `security` fixes.',
'',
'This is the second paragraph.',
],
},
];

const blogData = await generateBlogData();

assert.equal(
blogData.posts[0].description,
'Read the Node.js release notes for runtime updates and security fixes.'
);
});

it('should use only the first list item for description previews', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
`date: '2020-01-01T00:00:00.000Z'`,
`title: POST 1`,
`author: author`,
],
content: [
'',
'### Notable Changes',
'',
'- \\[[`abc123def4`](https://github.com/nodejs/node/commit/abc123def4)] - **crypto**: update `randomUUID()` [#12345](https://github.com/nodejs/node/pull/12345)',
'- **fs**: this should not be included',
'',
],
},
];

const blogData = await generateBlogData();

assert.equal(
blogData.posts[0].description,
'crypto: update randomUUID() #12345'
);
});

it('should skip category-only list items for description previews', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
`date: '2020-01-01T00:00:00.000Z'`,
`title: POST 1`,
`author: author`,
],
content: [
'',
'### Notable Changes',
'',
'- **console**:',
' - \\[[`abc123def4`](https://github.com/nodejs/node/commit/abc123def4)] - **console**: add color mode [#12345](https://github.com/nodejs/node/pull/12345)',
'- **fs**',
' - **fs**: this should not be included',
'',
],
},
];

const blogData = await generateBlogData();

assert.equal(
blogData.posts[0].description,
'console: add color mode #12345'
);
});

it('should ignore markup blocks when generating description previews', async () => {
files = [
{
path: 'pages/en/blog/post1.md',
frontMatterContent: [
`date: '2020-01-01T00:00:00.000Z'`,
`title: POST 1`,
`author: author`,
],
content: [
'',
'<div className="note">',
'This lowercase HTML block should be ignored.',
'</div>',
'<AlertBox type="warning">',
'This JSX block should be ignored too.',
'</AlertBox>',
'<BlogImage />',
'',
'This is the first real paragraph.',
],
},
];

const blogData = await generateBlogData();

assert.equal(
blogData.posts[0].description,
'This is the first real paragraph.'
);
});

it('should collect the data from a single md file if only one is found', async () => {
files = [
{
Expand Down
143 changes: 133 additions & 10 deletions apps/site/scripts/blog-data/generate.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,71 @@ import { getMarkdownFiles } from '#site/next.helpers.mjs';
// gets the current blog path based on local module path
const blogPath = join(process.cwd(), 'pages/en/blog');

const escapeRegExp = value => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');

const getMarkupBlockTag = line => {
const match = /^<([A-Za-z][\w.-]*)(?:\s|>|\/>|$)/.exec(line);

if (!match) {
return undefined;
}

const tag = match[1];
const closingTag = new RegExp(`</${escapeRegExp(tag)}>\\s*$`);

return {
tag,
isClosed: /\/>\s*$/.test(line) || closingTag.test(line),
};
};

const isNonParagraphLine = line =>
line.startsWith('#') ||
line.startsWith('![') ||
line.startsWith('```') ||
line.startsWith('~~~') ||
line.startsWith('---') ||
line.startsWith('</') ||
/^\[[^\]]+\]:/.test(line) ||
/^<!--.*-->$/.test(line);

const listItemMarker = /^\s*([-*]|\d+\.)\s+/;

const stripMarkdownMarkup = paragraph =>
paragraph
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
.replace(/\[([^\]]+)\]\[[^\]]*\]/g, '$1')
.replace(/`([^`]+)`/g, '$1')
.replace(/\*\*([^*]+)\*\*/g, '$1')
.replace(/__([^_]+)__/g, '$1')
.replace(/\*([^*]+)\*/g, '$1')
.replace(/_([^_]+)_/g, '$1')
.replace(/\\([[\]_*`])/g, '$1')
.replace(/^\[[a-f0-9]{7,12}\]\s+-\s+/i, '')
.replace(/<\/?[^>]+>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/\s+/g, ' ')
.trim();

const isCategoryOnlyListItem = item => {
const strippedItem = stripMarkdownMarkup(item);

return (
/^(\*\*[^*]+\*\*|`[^`]+`):?$/.test(item) ||
/^[\w ./-]+:$/.test(strippedItem)
);
};

/**
* This method parses the source (raw) Markdown content into Frontmatter
* and returns basic information for blog posts
*
* @param {string} filename the filename related to the blogpost
* @param {string} source the source markdown content of the blog post
* @param {string} paragraph the first paragraph of the blog post
*/
const getFrontMatter = (filename, source) => {
const getFrontMatter = (filename, source, paragraph) => {
const {
title = 'Untitled',
author = 'The Node.js Project',
Expand All @@ -42,6 +99,7 @@ const getFrontMatter = (filename, source) => {
author,
username,
date: new Date(date),
description: stripMarkdownMarkup(paragraph) || undefined,
categories,
slug,
};
Expand Down Expand Up @@ -76,28 +134,93 @@ const generateBlogData = async () => {

let rawFrontmatter = '';
let frontmatterSeparatorsEncountered = 0;
let ignoredMarkupTag;
const paragraphLines = [];

// We read line by line
_readLine.on('line', line => {
rawFrontmatter += `${line}\n`;

// We observe the frontmatter separators
if (line === '---') {
frontmatterSeparatorsEncountered++;
if (frontmatterSeparatorsEncountered < 2) {
rawFrontmatter += `${line}\n`;

if (line === '---') {
frontmatterSeparatorsEncountered++;
}

return;
}

const trimmedLine = line.trim();

if (ignoredMarkupTag) {
const closingTag = new RegExp(
`</${escapeRegExp(ignoredMarkupTag)}>\\s*$`
);

if (closingTag.test(trimmedLine)) {
ignoredMarkupTag = undefined;
}

return;
}

if (!trimmedLine) {
if (paragraphLines.length > 0) {
_readLine.close();
_stream.close();
}

return;
}

const markupBlockTag = getMarkupBlockTag(trimmedLine);

if (markupBlockTag) {
if (!markupBlockTag.isClosed) {
ignoredMarkupTag = markupBlockTag.tag;
}

return;
}

// Once we have two separators we close the readLine and the stream
if (frontmatterSeparatorsEncountered === 2) {
if (listItemMarker.test(line)) {
if (paragraphLines.length === 0) {
const listItem = line.replace(listItemMarker, '').trim();

if (isCategoryOnlyListItem(listItem)) {
return;
}

paragraphLines.push(listItem);
}

_readLine.close();
_stream.close();

return;
Comment thread
cursor[bot] marked this conversation as resolved.
}

if (isNonParagraphLine(trimmedLine)) {
if (paragraphLines.length > 0) {
_readLine.close();
_stream.close();
}

return;
}

paragraphLines.push(trimmedLine);
});

// Then we parse gray-matter on the frontmatter
// This allows us to only read the frontmatter part of each file
// and optimise the read-process as we have thousands of markdown files
// This allows us to read only the frontmatter and the first useful
// preview line instead of loading every blog post in full.
_readLine.on('close', () => {
const frontMatterData = getFrontMatter(filename, rawFrontmatter);
const frontMatterData = getFrontMatter(
filename,
rawFrontmatter,
paragraphLines.join(' ')
);

frontMatterData.categories.forEach(category => {
// we add the category to the categories set
Expand Down
1 change: 1 addition & 0 deletions apps/site/types/blog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export type BlogPost = {
author: string;
username: string;
date: string;
description?: string;
categories: Array<BlogCategory>;
slug: string;
};
Expand Down
2 changes: 2 additions & 0 deletions apps/site/util/__tests__/feeds.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ describe('generateWebsiteFeeds', () => {
slug: '/post-1',
title: 'Post 1',
date: '2025-04-18',
description: 'This is the first paragraph.',
categories: ['all'],
},
],
Expand All @@ -40,6 +41,7 @@ describe('generateWebsiteFeeds', () => {
guid: `${blogData.posts[0].slug}?${date.getTime()}`,
date,
link: `${base}${blogData.posts[0].slug}`,
description: blogData.posts[0].description,
},
]);
});
Expand Down
1 change: 1 addition & 0 deletions apps/site/util/feeds.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export const generateWebsiteFeeds = ({ posts }: BlogPostsRSC) => {
title: post.title,
date,
link: `${canonicalUrl}${post.slug}`,
...(post.description && { description: post.description }),
guid:
time > guidTimestampStartDate
? `${post.slug}?${date.getTime()}`
Expand Down