0
0
mirror of https://github.com/go-gitea/gitea.git synced 2026-06-29 22:57:20 +02:00

fix(actions): don't swallow HTML entities into linkified URLs (#38239)

In the Actions log viewer, a double-quoted URL renders with a stray
extra `;` after it.

Reported in `gitea/runner#1046`

Remove the buggy AI slop `linkifyURLs` and use new approach to process
URLs in text

---------

Signed-off-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
bircni 2026-06-28 13:37:16 +02:00 committed by GitHub
parent 5b9251150c
commit ce8cf22af9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 104 additions and 117 deletions

View File

@ -1,5 +1,5 @@
import {createElementFromAttrs} from '../utils/dom.ts';
import {renderAnsi} from '../render/ansi.ts';
import {renderAnsiInto} from '../render/ansi.ts';
import {reactive} from 'vue';
import type {ActionsArtifact, ActionsJob, ActionsRun, ActionsStatus} from '../modules/gitea-actions.ts';
import type {IntervalId} from '../types.ts';
@ -80,10 +80,10 @@ export function createLogLineMessage(line: LogLine, cmd: LogLineCommand | null)
if (label) {
logMsg.append(createElementFromAttrs('span', {class: 'log-msg-label'}, `${label}:`));
const msgSpan = document.createElement('span');
msgSpan.innerHTML = ` ${renderAnsi(msgContent.trimStart())}`;
renderAnsiInto(msgSpan, ` ${msgContent.trimStart()}`);
logMsg.append(msgSpan);
} else {
logMsg.innerHTML = renderAnsi(msgContent);
renderAnsiInto(logMsg, msgContent);
}
return logMsg;
}

View File

@ -1,41 +1,4 @@
import {findUrlAtPosition, trimUrlPunctuation, urlRawRegex} from './utils.ts';
function matchUrls(text: string): string[] {
return Array.from(text.matchAll(urlRawRegex), (m) => trimUrlPunctuation(m[0]));
}
test('matchUrls', () => {
expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']);
expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']);
expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']);
expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']);
expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']);
expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']);
expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']);
expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']);
expect(matchUrls('https://x.co')).toEqual(['https://x.co']);
expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']);
expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
expect(matchUrls('http://example.com')).toEqual(['http://example.com']);
expect(matchUrls('no url here')).toEqual([]);
expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']);
expect(matchUrls('[![](https://img.shields.io/npm/v/pkg.svg?style=flat)](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']);
});
test('trimUrlPunctuation', () => {
expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com');
expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path');
expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)');
expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)');
});
import {findUrlAtPosition} from './utils.ts';
test('findUrlAtPosition', () => {
const doc = 'visit https://example.com for info';

View File

@ -1,5 +1,6 @@
import type {EditorView, ViewUpdate} from '@codemirror/view';
import type {CodemirrorModules} from './main.ts';
import {trimUrlPunctuation, urlRawRegex} from '../../utils/url.ts';
/** Remove trailing whitespace from all lines in the editor. */
export function trimTrailingWhitespaceFromView(view: EditorView): void {
@ -15,22 +16,9 @@ export function trimTrailingWhitespaceFromView(view: EditorView): void {
if (changes.length) view.dispatch({changes});
}
/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */
export const urlRawRegex = /\bhttps?:\/\/[^\s<>[\]]+/gi;
/** Strip trailing punctuation that is likely not part of the URL. */
export function trimUrlPunctuation(url: string): string {
url = url.replace(/[.,;:'"]+$/, '');
// Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links)
while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) {
url = url.slice(0, -1);
}
return url;
}
/** Find the URL at the given character position in a document string, or null if none. */
export function findUrlAtPosition(doc: string, pos: number): string | null {
for (const match of doc.matchAll(urlRawRegex)) {
for (const match of doc.matchAll(urlRawRegex())) {
const url = trimUrlPunctuation(match[0]);
if (match.index !== undefined && pos >= match.index && pos < match.index + url.length) {
return url;
@ -67,7 +55,7 @@ export function goToDefinitionAt(cm: CodemirrorModules, view: EditorView, pos: n
export function clickableUrls(cm: CodemirrorModules) {
const urlMark = cm.view.Decoration.mark({class: 'cm-url'});
const urlDecorator = new cm.view.MatchDecorator({
regexp: urlRawRegex,
regexp: urlRawRegex(),
decorate: (add, from, _to, match) => {
const trimmed = trimUrlPunctuation(match[0]);
add(from, from + trimmed.length, urlMark);

View File

@ -1,6 +1,12 @@
import {renderAnsi} from './ansi.ts';
import {renderAnsiInto} from './ansi.ts';
test('renderAnsi', () => {
const renderAnsi = (line: string) => {
const el = document.createElement('div');
renderAnsiInto(el, line);
return el.innerHTML;
};
expect(renderAnsi('abc')).toEqual('abc');
expect(renderAnsi('abc\n')).toEqual('abc');
expect(renderAnsi('abc\r\n')).toEqual('abc');
@ -20,6 +26,9 @@ test('renderAnsi', () => {
// URLs in ANSI output become clickable links
const link = (url: string) => `<a href="${url}" target="_blank">${url}</a>`;
expect(renderAnsi('Downloading https://github.com/actions/upload-artifact/releases')).toEqual(`Downloading ${link('https://github.com/actions/upload-artifact/releases')}`);
expect(renderAnsi('\x1b[32mhttps://proxy.golang.org/cached-only\x1b[0m')).toEqual(`<span class="ansi-green-fg">${link('https://proxy.golang.org/cached-only')}</span>`);
expect(renderAnsi('foo https://example.com bar')).toEqual(`foo ${link('https://example.com')} bar`);
expect(renderAnsi('<https://example.com?a=b&c=d#h>')).toEqual(`&lt;${link('https://example.com?a=b&amp;c=d#h')}&gt;`);
expect(renderAnsi('open https://example.com.')).toEqual(`open ${link('https://example.com')}.`);
expect(renderAnsi('"https://example.com"')).toEqual(`"${link('https://example.com')}"`);
expect(renderAnsi('\x1b[32mhttps://example.com\x1b[0m')).toEqual(`<span class="ansi-green-fg">${link('https://example.com')}</span>`);
});

View File

@ -1,5 +1,5 @@
import {AnsiUp} from 'ansi_up';
import {linkifyURLs} from '../utils/url.ts';
import {trimUrlPunctuation, urlRawRegex} from '../utils/url.ts';
const replacements: Array<[RegExp, string]> = [
[/\x1b\[\d+[A-H]/g, ''], // Move cursor, treat them as no-op
@ -7,7 +7,7 @@ const replacements: Array<[RegExp, string]> = [
];
// render ANSI to HTML
export function renderAnsi(line: string): string {
export function renderAnsiInto(el: HTMLElement, line: string) {
// create a fresh ansi_up instance because otherwise previous renders can influence
// the output of future renders, because ansi_up is stateful and remembers things like
// unclosed opening tags for colors.
@ -44,5 +44,42 @@ export function renderAnsi(line: string): string {
result = lines.join('\n');
}
return linkifyURLs(result);
el.innerHTML = result;
// at the moment, only need to do post-process when there are potential URL links
if (result.includes('://')) renderAnsiPostProcessNode(el);
}
function renderAnsiProcessText(node: ChildNode): ChildNode {
const text = node.textContent!;
const match = urlRawRegex().exec(text);
if (!match || match.index === undefined) return node;
const before = text.slice(0, match.index);
const urlMatched = match[0];
const urlTrimmed = trimUrlPunctuation(urlMatched);
const after = text.slice(match.index + urlMatched.length - (urlMatched.length - urlTrimmed.length));
const link = document.createElement('a');
link.setAttribute('href', urlTrimmed);
link.setAttribute('target', '_blank');
link.textContent = urlTrimmed;
const newNodes: Array<Node | string> = [];
if (before) newNodes.push(before);
newNodes.push(link);
if (after) newNodes.push(after);
node.replaceWith(...newNodes);
return link;
}
function renderAnsiPostProcessNode(el: ChildNode) {
for (let node = el.firstChild; node; node = node.nextSibling) {
if (node.nodeName === 'A') continue;
if (node.nodeType !== Node.TEXT_NODE) {
renderAnsiPostProcessNode(node);
continue;
}
node = renderAnsiProcessText(node);
}
}

View File

@ -1,4 +1,4 @@
import {linkifyURLs, pathEscape, pathEscapeSegments, urlQueryEscape} from './url.ts';
import {pathEscape, pathEscapeSegments, trimUrlPunctuation, urlQueryEscape, urlRawRegex} from './url.ts';
describe('escape', () => {
const queryNonAscii = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
@ -19,29 +19,36 @@ describe('escape', () => {
});
});
test('linkifyURLs', () => {
const link = (url: string) => `<a href="${url}" target="_blank">${url}</a>`;
expect(linkifyURLs('https://example.com')).toEqual(link('https://example.com'));
expect(linkifyURLs('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz')).toEqual(link('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz'));
expect(linkifyURLs('https://example.com/path?query=1&amp;b=2#frag')).toEqual(link('https://example.com/path?query=1&amp;b=2#frag'));
expect(linkifyURLs('visit https://example.com/repo for info')).toEqual(`visit ${link('https://example.com/repo')} for info`);
expect(linkifyURLs('See https://example.com.')).toEqual(`See ${link('https://example.com')}.`);
expect(linkifyURLs('https://example.com, and more')).toEqual(`${link('https://example.com')}, and more`);
expect(linkifyURLs('<span class="ansi-green-fg">https://proxy.golang.org/cached-only</span>')).toEqual(`<span class="ansi-green-fg">${link('https://proxy.golang.org/cached-only')}</span>`);
expect(linkifyURLs('<span style="color:rgb(0,255,0)">https://registry.npmjs.org/@types/node</span>')).toEqual(`<span style="color:rgb(0,255,0)">${link('https://registry.npmjs.org/@types/node')}</span>`);
expect(linkifyURLs('https://a.com and https://b.org')).toEqual(`${link('https://a.com')} and ${link('https://b.org')}`);
expect(linkifyURLs('no urls here')).toEqual('no urls here');
expect(linkifyURLs('http://example.com/path')).toEqual(link('http://example.com/path'));
expect(linkifyURLs('http://localhost:3000/repo')).toEqual(link('http://localhost:3000/repo'));
expect(linkifyURLs('https://')).toEqual('https://');
expect(linkifyURLs('<a href="https://example.com">Click here</a>')).toEqual('<a href="https://example.com">Click here</a>');
expect(linkifyURLs('<a\nhref="https://example.com">Click here</a>')).toEqual('<a\nhref="https://example.com">Click here</a>');
expect(linkifyURLs('<a href="https://example.com">https://example.com</a>')).toEqual('<a href="https://example.com">https://example.com</a>');
expect(linkifyURLs('https://evil.com/<script>alert(1)</script>')).toEqual(`${link('https://evil.com/')}<script>alert(1)</script>`);
expect(linkifyURLs('https://evil.com/"onmouseover="alert(1)')).toEqual(`${link('https://evil.com/')}"onmouseover="alert(1)`);
expect(linkifyURLs('javascript:alert(1)')).toEqual('javascript:alert(1)'); // eslint-disable-line no-script-url
expect(linkifyURLs("https://evil.com/'onclick='alert(1)")).toEqual(`${link('https://evil.com/')}'onclick='alert(1)`);
expect(linkifyURLs('data:text/html,<script>alert(1)</script>')).toEqual('data:text/html,<script>alert(1)</script>');
expect(linkifyURLs('https://evil.com/\nonclick=alert(1)')).toEqual(`${link('https://evil.com/')}\nonclick=alert(1)`);
expect(linkifyURLs('https://evil.com/&#34;onmouseover=alert(1)')).toEqual(`${link('https://evil.com/&#34;onmouseover=alert')}(1)`);
test('matchUrls', () => {
const matchUrls = (text: string) => Array.from(text.matchAll(urlRawRegex()), (m) => trimUrlPunctuation(m[0]));
expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']);
expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']);
expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']);
expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']);
expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']);
expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']);
expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']);
expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']);
expect(matchUrls('https://x.co')).toEqual(['https://x.co']);
expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']);
expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
expect(matchUrls('http://example.com')).toEqual(['http://example.com']);
expect(matchUrls('no url here')).toEqual([]);
expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']);
expect(matchUrls('[![](https://img.shields.io/npm/v/pkg.svg?style=flat)](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']);
});
test('trimUrlPunctuation', () => {
expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com');
expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path');
expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)');
expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com');
expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)');
});

View File

@ -1,4 +1,15 @@
import {html, htmlRaw} from './html.ts';
/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */
export const urlRawRegex = () => /\bhttps?:\/\/[^\s<>[\]]+/gi; // JS regexp has internal states, so always use a new instance
/** Strip trailing punctuation that is likely not part of the URL. */
export function trimUrlPunctuation(url: string): string {
url = url.replace(/[.,;:'"]+$/, '');
// Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links)
while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) {
url = url.slice(0, -1);
}
return url;
}
export function urlQueryEscape(s: string) {
// See "TestQueryEscape" in backend
@ -31,31 +42,3 @@ export function pathEscapeSegments(s: string): string {
// The same as backend's PathEscapeSegments
return s.split('/').map(pathEscape).join('/');
}
// Match HTML tags (to skip) or URLs (to linkify) in HTML content
const urlLinkifyPattern = /(<([-\w]+)[^>]*>)|(<\/([-\w]+)[^>]*>)|(https?:\/\/[^\s<>"'`|(){}[\]]+)/gi;
const trailingPunctPattern = /[.,;:!?]+$/;
// Convert URLs to clickable links in HTML, preserving existing HTML tags
export function linkifyURLs(htmlString: string): string {
let inAnchor = false;
return htmlString.replace(urlLinkifyPattern, (match, _openTagFull, openTag, _closeTagFull, closeTag, url) => {
// skip URLs inside existing <a> tags
if (openTag === 'a') {
inAnchor = true;
return match;
} else if (closeTag === 'a') {
inAnchor = false;
return match;
}
if (inAnchor || !url) {
return match;
}
const trailingPunct = url.match(trailingPunctPattern);
const cleanUrl = trailingPunct ? url.slice(0, -trailingPunct[0].length) : url;
const trailing = trailingPunct ? trailingPunct[0] : '';
// safe because regexp only matches valid URLs (no quotes or angle brackets)
return html`<a href="${htmlRaw(cleanUrl)}" target="_blank">${htmlRaw(cleanUrl)}</a>${htmlRaw(trailing)}`;
});
}