mirror of
https://github.com/go-gitea/gitea.git
synced 2026-06-29 22:57:20 +02:00
fix(actions): don't swallow HTML entities into linkified URLs (#38239)
In the Actions log viewer, a double-quoted URL renders with a stray extra `;` after it. Reported in `gitea/runner#1046` Remove the buggy AI slop `linkifyURLs` and use new approach to process URLs in text --------- Signed-off-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
parent
5b9251150c
commit
ce8cf22af9
@ -1,5 +1,5 @@
|
||||
import {createElementFromAttrs} from '../utils/dom.ts';
|
||||
import {renderAnsi} from '../render/ansi.ts';
|
||||
import {renderAnsiInto} from '../render/ansi.ts';
|
||||
import {reactive} from 'vue';
|
||||
import type {ActionsArtifact, ActionsJob, ActionsRun, ActionsStatus} from '../modules/gitea-actions.ts';
|
||||
import type {IntervalId} from '../types.ts';
|
||||
@ -80,10 +80,10 @@ export function createLogLineMessage(line: LogLine, cmd: LogLineCommand | null)
|
||||
if (label) {
|
||||
logMsg.append(createElementFromAttrs('span', {class: 'log-msg-label'}, `${label}:`));
|
||||
const msgSpan = document.createElement('span');
|
||||
msgSpan.innerHTML = ` ${renderAnsi(msgContent.trimStart())}`;
|
||||
renderAnsiInto(msgSpan, ` ${msgContent.trimStart()}`);
|
||||
logMsg.append(msgSpan);
|
||||
} else {
|
||||
logMsg.innerHTML = renderAnsi(msgContent);
|
||||
renderAnsiInto(logMsg, msgContent);
|
||||
}
|
||||
return logMsg;
|
||||
}
|
||||
|
||||
@ -1,41 +1,4 @@
|
||||
import {findUrlAtPosition, trimUrlPunctuation, urlRawRegex} from './utils.ts';
|
||||
|
||||
function matchUrls(text: string): string[] {
|
||||
return Array.from(text.matchAll(urlRawRegex), (m) => trimUrlPunctuation(m[0]));
|
||||
}
|
||||
|
||||
test('matchUrls', () => {
|
||||
expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']);
|
||||
expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']);
|
||||
expect(matchUrls('https://x.co')).toEqual(['https://x.co']);
|
||||
expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']);
|
||||
expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
|
||||
expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
|
||||
expect(matchUrls('http://example.com')).toEqual(['http://example.com']);
|
||||
expect(matchUrls('no url here')).toEqual([]);
|
||||
expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']);
|
||||
expect(matchUrls('[](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']);
|
||||
});
|
||||
|
||||
test('trimUrlPunctuation', () => {
|
||||
expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path');
|
||||
expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)');
|
||||
expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)');
|
||||
});
|
||||
import {findUrlAtPosition} from './utils.ts';
|
||||
|
||||
test('findUrlAtPosition', () => {
|
||||
const doc = 'visit https://example.com for info';
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import type {EditorView, ViewUpdate} from '@codemirror/view';
|
||||
import type {CodemirrorModules} from './main.ts';
|
||||
import {trimUrlPunctuation, urlRawRegex} from '../../utils/url.ts';
|
||||
|
||||
/** Remove trailing whitespace from all lines in the editor. */
|
||||
export function trimTrailingWhitespaceFromView(view: EditorView): void {
|
||||
@ -15,22 +16,9 @@ export function trimTrailingWhitespaceFromView(view: EditorView): void {
|
||||
if (changes.length) view.dispatch({changes});
|
||||
}
|
||||
|
||||
/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */
|
||||
export const urlRawRegex = /\bhttps?:\/\/[^\s<>[\]]+/gi;
|
||||
|
||||
/** Strip trailing punctuation that is likely not part of the URL. */
|
||||
export function trimUrlPunctuation(url: string): string {
|
||||
url = url.replace(/[.,;:'"]+$/, '');
|
||||
// Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links)
|
||||
while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) {
|
||||
url = url.slice(0, -1);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
/** Find the URL at the given character position in a document string, or null if none. */
|
||||
export function findUrlAtPosition(doc: string, pos: number): string | null {
|
||||
for (const match of doc.matchAll(urlRawRegex)) {
|
||||
for (const match of doc.matchAll(urlRawRegex())) {
|
||||
const url = trimUrlPunctuation(match[0]);
|
||||
if (match.index !== undefined && pos >= match.index && pos < match.index + url.length) {
|
||||
return url;
|
||||
@ -67,7 +55,7 @@ export function goToDefinitionAt(cm: CodemirrorModules, view: EditorView, pos: n
|
||||
export function clickableUrls(cm: CodemirrorModules) {
|
||||
const urlMark = cm.view.Decoration.mark({class: 'cm-url'});
|
||||
const urlDecorator = new cm.view.MatchDecorator({
|
||||
regexp: urlRawRegex,
|
||||
regexp: urlRawRegex(),
|
||||
decorate: (add, from, _to, match) => {
|
||||
const trimmed = trimUrlPunctuation(match[0]);
|
||||
add(from, from + trimmed.length, urlMark);
|
||||
|
||||
@ -1,6 +1,12 @@
|
||||
import {renderAnsi} from './ansi.ts';
|
||||
import {renderAnsiInto} from './ansi.ts';
|
||||
|
||||
test('renderAnsi', () => {
|
||||
const renderAnsi = (line: string) => {
|
||||
const el = document.createElement('div');
|
||||
renderAnsiInto(el, line);
|
||||
return el.innerHTML;
|
||||
};
|
||||
|
||||
expect(renderAnsi('abc')).toEqual('abc');
|
||||
expect(renderAnsi('abc\n')).toEqual('abc');
|
||||
expect(renderAnsi('abc\r\n')).toEqual('abc');
|
||||
@ -20,6 +26,9 @@ test('renderAnsi', () => {
|
||||
|
||||
// URLs in ANSI output become clickable links
|
||||
const link = (url: string) => `<a href="${url}" target="_blank">${url}</a>`;
|
||||
expect(renderAnsi('Downloading https://github.com/actions/upload-artifact/releases')).toEqual(`Downloading ${link('https://github.com/actions/upload-artifact/releases')}`);
|
||||
expect(renderAnsi('\x1b[32mhttps://proxy.golang.org/cached-only\x1b[0m')).toEqual(`<span class="ansi-green-fg">${link('https://proxy.golang.org/cached-only')}</span>`);
|
||||
expect(renderAnsi('foo https://example.com bar')).toEqual(`foo ${link('https://example.com')} bar`);
|
||||
expect(renderAnsi('<https://example.com?a=b&c=d#h>')).toEqual(`<${link('https://example.com?a=b&c=d#h')}>`);
|
||||
expect(renderAnsi('open https://example.com.')).toEqual(`open ${link('https://example.com')}.`);
|
||||
expect(renderAnsi('"https://example.com"')).toEqual(`"${link('https://example.com')}"`);
|
||||
expect(renderAnsi('\x1b[32mhttps://example.com\x1b[0m')).toEqual(`<span class="ansi-green-fg">${link('https://example.com')}</span>`);
|
||||
});
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import {AnsiUp} from 'ansi_up';
|
||||
import {linkifyURLs} from '../utils/url.ts';
|
||||
import {trimUrlPunctuation, urlRawRegex} from '../utils/url.ts';
|
||||
|
||||
const replacements: Array<[RegExp, string]> = [
|
||||
[/\x1b\[\d+[A-H]/g, ''], // Move cursor, treat them as no-op
|
||||
@ -7,7 +7,7 @@ const replacements: Array<[RegExp, string]> = [
|
||||
];
|
||||
|
||||
// render ANSI to HTML
|
||||
export function renderAnsi(line: string): string {
|
||||
export function renderAnsiInto(el: HTMLElement, line: string) {
|
||||
// create a fresh ansi_up instance because otherwise previous renders can influence
|
||||
// the output of future renders, because ansi_up is stateful and remembers things like
|
||||
// unclosed opening tags for colors.
|
||||
@ -44,5 +44,42 @@ export function renderAnsi(line: string): string {
|
||||
result = lines.join('\n');
|
||||
}
|
||||
|
||||
return linkifyURLs(result);
|
||||
el.innerHTML = result;
|
||||
// at the moment, only need to do post-process when there are potential URL links
|
||||
if (result.includes('://')) renderAnsiPostProcessNode(el);
|
||||
}
|
||||
|
||||
function renderAnsiProcessText(node: ChildNode): ChildNode {
|
||||
const text = node.textContent!;
|
||||
const match = urlRawRegex().exec(text);
|
||||
if (!match || match.index === undefined) return node;
|
||||
|
||||
const before = text.slice(0, match.index);
|
||||
const urlMatched = match[0];
|
||||
const urlTrimmed = trimUrlPunctuation(urlMatched);
|
||||
const after = text.slice(match.index + urlMatched.length - (urlMatched.length - urlTrimmed.length));
|
||||
|
||||
const link = document.createElement('a');
|
||||
link.setAttribute('href', urlTrimmed);
|
||||
link.setAttribute('target', '_blank');
|
||||
link.textContent = urlTrimmed;
|
||||
|
||||
const newNodes: Array<Node | string> = [];
|
||||
if (before) newNodes.push(before);
|
||||
newNodes.push(link);
|
||||
if (after) newNodes.push(after);
|
||||
|
||||
node.replaceWith(...newNodes);
|
||||
return link;
|
||||
}
|
||||
|
||||
function renderAnsiPostProcessNode(el: ChildNode) {
|
||||
for (let node = el.firstChild; node; node = node.nextSibling) {
|
||||
if (node.nodeName === 'A') continue;
|
||||
if (node.nodeType !== Node.TEXT_NODE) {
|
||||
renderAnsiPostProcessNode(node);
|
||||
continue;
|
||||
}
|
||||
node = renderAnsiProcessText(node);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import {linkifyURLs, pathEscape, pathEscapeSegments, urlQueryEscape} from './url.ts';
|
||||
import {pathEscape, pathEscapeSegments, trimUrlPunctuation, urlQueryEscape, urlRawRegex} from './url.ts';
|
||||
|
||||
describe('escape', () => {
|
||||
const queryNonAscii = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
|
||||
@ -19,29 +19,36 @@ describe('escape', () => {
|
||||
});
|
||||
});
|
||||
|
||||
test('linkifyURLs', () => {
|
||||
const link = (url: string) => `<a href="${url}" target="_blank">${url}</a>`;
|
||||
expect(linkifyURLs('https://example.com')).toEqual(link('https://example.com'));
|
||||
expect(linkifyURLs('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz')).toEqual(link('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz'));
|
||||
expect(linkifyURLs('https://example.com/path?query=1&b=2#frag')).toEqual(link('https://example.com/path?query=1&b=2#frag'));
|
||||
expect(linkifyURLs('visit https://example.com/repo for info')).toEqual(`visit ${link('https://example.com/repo')} for info`);
|
||||
expect(linkifyURLs('See https://example.com.')).toEqual(`See ${link('https://example.com')}.`);
|
||||
expect(linkifyURLs('https://example.com, and more')).toEqual(`${link('https://example.com')}, and more`);
|
||||
expect(linkifyURLs('<span class="ansi-green-fg">https://proxy.golang.org/cached-only</span>')).toEqual(`<span class="ansi-green-fg">${link('https://proxy.golang.org/cached-only')}</span>`);
|
||||
expect(linkifyURLs('<span style="color:rgb(0,255,0)">https://registry.npmjs.org/@types/node</span>')).toEqual(`<span style="color:rgb(0,255,0)">${link('https://registry.npmjs.org/@types/node')}</span>`);
|
||||
expect(linkifyURLs('https://a.com and https://b.org')).toEqual(`${link('https://a.com')} and ${link('https://b.org')}`);
|
||||
expect(linkifyURLs('no urls here')).toEqual('no urls here');
|
||||
expect(linkifyURLs('http://example.com/path')).toEqual(link('http://example.com/path'));
|
||||
expect(linkifyURLs('http://localhost:3000/repo')).toEqual(link('http://localhost:3000/repo'));
|
||||
expect(linkifyURLs('https://')).toEqual('https://');
|
||||
expect(linkifyURLs('<a href="https://example.com">Click here</a>')).toEqual('<a href="https://example.com">Click here</a>');
|
||||
expect(linkifyURLs('<a\nhref="https://example.com">Click here</a>')).toEqual('<a\nhref="https://example.com">Click here</a>');
|
||||
expect(linkifyURLs('<a href="https://example.com">https://example.com</a>')).toEqual('<a href="https://example.com">https://example.com</a>');
|
||||
expect(linkifyURLs('https://evil.com/<script>alert(1)</script>')).toEqual(`${link('https://evil.com/')}<script>alert(1)</script>`);
|
||||
expect(linkifyURLs('https://evil.com/"onmouseover="alert(1)')).toEqual(`${link('https://evil.com/')}"onmouseover="alert(1)`);
|
||||
expect(linkifyURLs('javascript:alert(1)')).toEqual('javascript:alert(1)'); // eslint-disable-line no-script-url
|
||||
expect(linkifyURLs("https://evil.com/'onclick='alert(1)")).toEqual(`${link('https://evil.com/')}'onclick='alert(1)`);
|
||||
expect(linkifyURLs('data:text/html,<script>alert(1)</script>')).toEqual('data:text/html,<script>alert(1)</script>');
|
||||
expect(linkifyURLs('https://evil.com/\nonclick=alert(1)')).toEqual(`${link('https://evil.com/')}\nonclick=alert(1)`);
|
||||
expect(linkifyURLs('https://evil.com/"onmouseover=alert(1)')).toEqual(`${link('https://evil.com/"onmouseover=alert')}(1)`);
|
||||
test('matchUrls', () => {
|
||||
const matchUrls = (text: string) => Array.from(text.matchAll(urlRawRegex()), (m) => trimUrlPunctuation(m[0]));
|
||||
expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']);
|
||||
expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']);
|
||||
expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']);
|
||||
expect(matchUrls('https://x.co')).toEqual(['https://x.co']);
|
||||
expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']);
|
||||
expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
|
||||
expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
|
||||
expect(matchUrls('http://example.com')).toEqual(['http://example.com']);
|
||||
expect(matchUrls('no url here')).toEqual([]);
|
||||
expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']);
|
||||
expect(matchUrls('[](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']);
|
||||
});
|
||||
|
||||
test('trimUrlPunctuation', () => {
|
||||
expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path');
|
||||
expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)');
|
||||
expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com');
|
||||
expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)');
|
||||
});
|
||||
|
||||
@ -1,4 +1,15 @@
|
||||
import {html, htmlRaw} from './html.ts';
|
||||
/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */
|
||||
export const urlRawRegex = () => /\bhttps?:\/\/[^\s<>[\]]+/gi; // JS regexp has internal states, so always use a new instance
|
||||
|
||||
/** Strip trailing punctuation that is likely not part of the URL. */
|
||||
export function trimUrlPunctuation(url: string): string {
|
||||
url = url.replace(/[.,;:'"]+$/, '');
|
||||
// Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links)
|
||||
while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) {
|
||||
url = url.slice(0, -1);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
export function urlQueryEscape(s: string) {
|
||||
// See "TestQueryEscape" in backend
|
||||
@ -31,31 +42,3 @@ export function pathEscapeSegments(s: string): string {
|
||||
// The same as backend's PathEscapeSegments
|
||||
return s.split('/').map(pathEscape).join('/');
|
||||
}
|
||||
|
||||
// Match HTML tags (to skip) or URLs (to linkify) in HTML content
|
||||
const urlLinkifyPattern = /(<([-\w]+)[^>]*>)|(<\/([-\w]+)[^>]*>)|(https?:\/\/[^\s<>"'`|(){}[\]]+)/gi;
|
||||
const trailingPunctPattern = /[.,;:!?]+$/;
|
||||
|
||||
// Convert URLs to clickable links in HTML, preserving existing HTML tags
|
||||
export function linkifyURLs(htmlString: string): string {
|
||||
let inAnchor = false;
|
||||
return htmlString.replace(urlLinkifyPattern, (match, _openTagFull, openTag, _closeTagFull, closeTag, url) => {
|
||||
// skip URLs inside existing <a> tags
|
||||
if (openTag === 'a') {
|
||||
inAnchor = true;
|
||||
return match;
|
||||
} else if (closeTag === 'a') {
|
||||
inAnchor = false;
|
||||
return match;
|
||||
}
|
||||
if (inAnchor || !url) {
|
||||
return match;
|
||||
}
|
||||
|
||||
const trailingPunct = url.match(trailingPunctPattern);
|
||||
const cleanUrl = trailingPunct ? url.slice(0, -trailingPunct[0].length) : url;
|
||||
const trailing = trailingPunct ? trailingPunct[0] : '';
|
||||
// safe because regexp only matches valid URLs (no quotes or angle brackets)
|
||||
return html`<a href="${htmlRaw(cleanUrl)}" target="_blank">${htmlRaw(cleanUrl)}</a>${htmlRaw(trailing)}`;
|
||||
});
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user