fix(actions): don't swallow HTML entities into linkified URLs (#38239)

In the Actions log viewer, a double-quoted URL renders with a stray extra `;` after it. Reported in `gitea/runner#1046` Remove the buggy AI slop `linkifyURLs` and use new approach to process URLs in text --------- Signed-off-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
2026-07-20 02:53:00 +02:00 · 2026-06-28 19:37:16 +08:00
parent 5b9251150c
commit ce8cf22af9
7 changed files with 104 additions and 117 deletions
@@ -1,5 +1,5 @@
 import {createElementFromAttrs} from '../utils/dom.ts';
-import {renderAnsi} from '../render/ansi.ts';
+import {renderAnsiInto} from '../render/ansi.ts';
 import {reactive} from 'vue';
 import type {ActionsArtifact, ActionsJob, ActionsRun, ActionsStatus} from '../modules/gitea-actions.ts';
 import type {IntervalId} from '../types.ts';
@@ -80,10 +80,10 @@ export function createLogLineMessage(line: LogLine, cmd: LogLineCommand | null)
  if (label) {
    logMsg.append(createElementFromAttrs('span', {class: 'log-msg-label'}, `${label}:`));
    const msgSpan = document.createElement('span');
-    msgSpan.innerHTML = ` ${renderAnsi(msgContent.trimStart())}`;
+    renderAnsiInto(msgSpan, ` ${msgContent.trimStart()}`);
    logMsg.append(msgSpan);
  } else {
-    logMsg.innerHTML = renderAnsi(msgContent);
+    renderAnsiInto(logMsg, msgContent);
  }
  return logMsg;
 }
@@ -1,41 +1,4 @@
-import {findUrlAtPosition, trimUrlPunctuation, urlRawRegex} from './utils.ts';
-
-function matchUrls(text: string): string[] {
-  return Array.from(text.matchAll(urlRawRegex), (m) => trimUrlPunctuation(m[0]));
-}
-
-test('matchUrls', () => {
-  expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']);
-  expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']);
-  expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']);
-  expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']);
-  expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']);
-  expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']);
-  expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']);
-  expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']);
-  expect(matchUrls('https://x.co')).toEqual(['https://x.co']);
-  expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']);
-  expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
-  expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
-  expect(matchUrls('http://example.com')).toEqual(['http://example.com']);
-  expect(matchUrls('no url here')).toEqual([]);
-  expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']);
-  expect(matchUrls('[![](https://img.shields.io/npm/v/pkg.svg?style=flat)](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']);
-});
-
-test('trimUrlPunctuation', () => {
-  expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com');
-  expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path');
-  expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)');
-  expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com');
-  expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)');
-});
+import {findUrlAtPosition} from './utils.ts';

 test('findUrlAtPosition', () => {
  const doc = 'visit https://example.com for info';
@@ -1,5 +1,6 @@
 import type {EditorView, ViewUpdate} from '@codemirror/view';
 import type {CodemirrorModules} from './main.ts';
+import {trimUrlPunctuation, urlRawRegex} from '../../utils/url.ts';

 /** Remove trailing whitespace from all lines in the editor. */
 export function trimTrailingWhitespaceFromView(view: EditorView): void {
@@ -15,22 +16,9 @@ export function trimTrailingWhitespaceFromView(view: EditorView): void {
  if (changes.length) view.dispatch({changes});
 }

-/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */
-export const urlRawRegex = /\bhttps?:\/\/[^\s<>[\]]+/gi;
-
-/** Strip trailing punctuation that is likely not part of the URL. */
-export function trimUrlPunctuation(url: string): string {
-  url = url.replace(/[.,;:'"]+$/, '');
-  // Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links)
-  while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) {
-    url = url.slice(0, -1);
-  }
-  return url;
-}
-
 /** Find the URL at the given character position in a document string, or null if none. */
 export function findUrlAtPosition(doc: string, pos: number): string | null {
-  for (const match of doc.matchAll(urlRawRegex)) {
+  for (const match of doc.matchAll(urlRawRegex())) {
    const url = trimUrlPunctuation(match[0]);
    if (match.index !== undefined && pos >= match.index && pos < match.index + url.length) {
      return url;
@@ -67,7 +55,7 @@ export function goToDefinitionAt(cm: CodemirrorModules, view: EditorView, pos: n
 export function clickableUrls(cm: CodemirrorModules) {
  const urlMark = cm.view.Decoration.mark({class: 'cm-url'});
  const urlDecorator = new cm.view.MatchDecorator({
-    regexp: urlRawRegex,
+    regexp: urlRawRegex(),
    decorate: (add, from, _to, match) => {
      const trimmed = trimUrlPunctuation(match[0]);
      add(from, from + trimmed.length, urlMark);
@@ -1,6 +1,12 @@
-import {renderAnsi} from './ansi.ts';
+import {renderAnsiInto} from './ansi.ts';

 test('renderAnsi', () => {
+  const renderAnsi = (line: string) => {
+    const el = document.createElement('div');
+    renderAnsiInto(el, line);
+    return el.innerHTML;
+  };
+
  expect(renderAnsi('abc')).toEqual('abc');
  expect(renderAnsi('abc\n')).toEqual('abc');
  expect(renderAnsi('abc\r\n')).toEqual('abc');
@@ -20,6 +26,9 @@ test('renderAnsi', () => {

  // URLs in ANSI output become clickable links
  const link = (url: string) => `<a href="${url}" target="_blank">${url}</a>`;
-  expect(renderAnsi('Downloading https://github.com/actions/upload-artifact/releases')).toEqual(`Downloading ${link('https://github.com/actions/upload-artifact/releases')}`);
-  expect(renderAnsi('\x1b[32mhttps://proxy.golang.org/cached-only\x1b[0m')).toEqual(`<span class="ansi-green-fg">${link('https://proxy.golang.org/cached-only')}</span>`);
+  expect(renderAnsi('foo https://example.com bar')).toEqual(`foo ${link('https://example.com')} bar`);
+  expect(renderAnsi('<https://example.com?a=b&c=d#h>')).toEqual(`&lt;${link('https://example.com?a=b&amp;c=d#h')}&gt;`);
+  expect(renderAnsi('open https://example.com.')).toEqual(`open ${link('https://example.com')}.`);
+  expect(renderAnsi('"https://example.com"')).toEqual(`"${link('https://example.com')}"`);
+  expect(renderAnsi('\x1b[32mhttps://example.com\x1b[0m')).toEqual(`<span class="ansi-green-fg">${link('https://example.com')}</span>`);
 });
@@ -1,5 +1,5 @@
 import {AnsiUp} from 'ansi_up';
-import {linkifyURLs} from '../utils/url.ts';
+import {trimUrlPunctuation, urlRawRegex} from '../utils/url.ts';

 const replacements: Array<[RegExp, string]> = [
  [/\x1b\[\d+[A-H]/g, ''], // Move cursor, treat them as no-op
@@ -7,7 +7,7 @@ const replacements: Array<[RegExp, string]> = [
 ];

 // render ANSI to HTML
-export function renderAnsi(line: string): string {
+export function renderAnsiInto(el: HTMLElement, line: string) {
  // create a fresh ansi_up instance because otherwise previous renders can influence
  // the output of future renders, because ansi_up is stateful and remembers things like
  // unclosed opening tags for colors.
@@ -44,5 +44,42 @@ export function renderAnsi(line: string): string {
    result = lines.join('\n');
  }

-  return linkifyURLs(result);
+  el.innerHTML = result;
+  // at the moment, only need to do post-process when there are potential URL links
+  if (result.includes('://')) renderAnsiPostProcessNode(el);
+}
+
+function renderAnsiProcessText(node: ChildNode): ChildNode {
+  const text = node.textContent!;
+  const match = urlRawRegex().exec(text);
+  if (!match || match.index === undefined) return node;
+
+  const before = text.slice(0, match.index);
+  const urlMatched = match[0];
+  const urlTrimmed = trimUrlPunctuation(urlMatched);
+  const after = text.slice(match.index + urlMatched.length - (urlMatched.length - urlTrimmed.length));
+
+  const link = document.createElement('a');
+  link.setAttribute('href', urlTrimmed);
+  link.setAttribute('target', '_blank');
+  link.textContent = urlTrimmed;
+
+  const newNodes: Array<Node | string> = [];
+  if (before) newNodes.push(before);
+  newNodes.push(link);
+  if (after) newNodes.push(after);
+
+  node.replaceWith(...newNodes);
+  return link;
+}
+
+function renderAnsiPostProcessNode(el: ChildNode) {
+  for (let node = el.firstChild; node; node = node.nextSibling) {
+    if (node.nodeName === 'A') continue;
+    if (node.nodeType !== Node.TEXT_NODE) {
+      renderAnsiPostProcessNode(node);
+      continue;
+    }
+    node = renderAnsiProcessText(node);
+  }
 }
@@ -1,4 +1,4 @@
-import {linkifyURLs, pathEscape, pathEscapeSegments, urlQueryEscape} from './url.ts';
+import {pathEscape, pathEscapeSegments, trimUrlPunctuation, urlQueryEscape, urlRawRegex} from './url.ts';

 describe('escape', () => {
  const queryNonAscii = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
@@ -19,29 +19,36 @@ describe('escape', () => {
  });
 });

-test('linkifyURLs', () => {
-  const link = (url: string) => `<a href="${url}" target="_blank">${url}</a>`;
-  expect(linkifyURLs('https://example.com')).toEqual(link('https://example.com'));
-  expect(linkifyURLs('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz')).toEqual(link('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz'));
-  expect(linkifyURLs('https://example.com/path?query=1&amp;b=2#frag')).toEqual(link('https://example.com/path?query=1&amp;b=2#frag'));
-  expect(linkifyURLs('visit https://example.com/repo for info')).toEqual(`visit ${link('https://example.com/repo')} for info`);
-  expect(linkifyURLs('See https://example.com.')).toEqual(`See ${link('https://example.com')}.`);
-  expect(linkifyURLs('https://example.com, and more')).toEqual(`${link('https://example.com')}, and more`);
-  expect(linkifyURLs('<span class="ansi-green-fg">https://proxy.golang.org/cached-only</span>')).toEqual(`<span class="ansi-green-fg">${link('https://proxy.golang.org/cached-only')}</span>`);
-  expect(linkifyURLs('<span style="color:rgb(0,255,0)">https://registry.npmjs.org/@types/node</span>')).toEqual(`<span style="color:rgb(0,255,0)">${link('https://registry.npmjs.org/@types/node')}</span>`);
-  expect(linkifyURLs('https://a.com and https://b.org')).toEqual(`${link('https://a.com')} and ${link('https://b.org')}`);
-  expect(linkifyURLs('no urls here')).toEqual('no urls here');
-  expect(linkifyURLs('http://example.com/path')).toEqual(link('http://example.com/path'));
-  expect(linkifyURLs('http://localhost:3000/repo')).toEqual(link('http://localhost:3000/repo'));
-  expect(linkifyURLs('https://')).toEqual('https://');
-  expect(linkifyURLs('<a href="https://example.com">Click here</a>')).toEqual('<a href="https://example.com">Click here</a>');
-  expect(linkifyURLs('<a\nhref="https://example.com">Click here</a>')).toEqual('<a\nhref="https://example.com">Click here</a>');
-  expect(linkifyURLs('<a href="https://example.com">https://example.com</a>')).toEqual('<a href="https://example.com">https://example.com</a>');
-  expect(linkifyURLs('https://evil.com/<script>alert(1)</script>')).toEqual(`${link('https://evil.com/')}<script>alert(1)</script>`);
-  expect(linkifyURLs('https://evil.com/"onmouseover="alert(1)')).toEqual(`${link('https://evil.com/')}"onmouseover="alert(1)`);
-  expect(linkifyURLs('javascript:alert(1)')).toEqual('javascript:alert(1)'); // eslint-disable-line no-script-url
-  expect(linkifyURLs("https://evil.com/'onclick='alert(1)")).toEqual(`${link('https://evil.com/')}'onclick='alert(1)`);
-  expect(linkifyURLs('data:text/html,<script>alert(1)</script>')).toEqual('data:text/html,<script>alert(1)</script>');
-  expect(linkifyURLs('https://evil.com/\nonclick=alert(1)')).toEqual(`${link('https://evil.com/')}\nonclick=alert(1)`);
-  expect(linkifyURLs('https://evil.com/&#34;onmouseover=alert(1)')).toEqual(`${link('https://evil.com/&#34;onmouseover=alert')}(1)`);
+test('matchUrls', () => {
+  const matchUrls = (text: string) => Array.from(text.matchAll(urlRawRegex()), (m) => trimUrlPunctuation(m[0]));
+  expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']);
+  expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']);
+  expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']);
+  expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']);
+  expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']);
+  expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']);
+  expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']);
+  expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']);
+  expect(matchUrls('https://x.co')).toEqual(['https://x.co']);
+  expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']);
+  expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
+  expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']);
+  expect(matchUrls('http://example.com')).toEqual(['http://example.com']);
+  expect(matchUrls('no url here')).toEqual([]);
+  expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']);
+  expect(matchUrls('[![](https://img.shields.io/npm/v/pkg.svg?style=flat)](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']);
+});
+
+test('trimUrlPunctuation', () => {
+  expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com');
+  expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path');
+  expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)');
+  expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com');
+  expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)');
 });
@@ -1,4 +1,15 @@
-import {html, htmlRaw} from './html.ts';
+/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */
+export const urlRawRegex = () => /\bhttps?:\/\/[^\s<>[\]]+/gi; // JS regexp has internal states, so always use a new instance
+
+/** Strip trailing punctuation that is likely not part of the URL. */
+export function trimUrlPunctuation(url: string): string {
+  url = url.replace(/[.,;:'"]+$/, '');
+  // Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links)
+  while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) {
+    url = url.slice(0, -1);
+  }
+  return url;
+}

 export function urlQueryEscape(s: string) {
  // See "TestQueryEscape" in backend
@@ -31,31 +42,3 @@ export function pathEscapeSegments(s: string): string {
  // The same as backend's PathEscapeSegments
  return s.split('/').map(pathEscape).join('/');
 }
-
-// Match HTML tags (to skip) or URLs (to linkify) in HTML content
-const urlLinkifyPattern = /(<([-\w]+)[^>]*>)|(<\/([-\w]+)[^>]*>)|(https?:\/\/[^\s<>"'`|(){}[\]]+)/gi;
-const trailingPunctPattern = /[.,;:!?]+$/;
-
-// Convert URLs to clickable links in HTML, preserving existing HTML tags
-export function linkifyURLs(htmlString: string): string {
-  let inAnchor = false;
-  return htmlString.replace(urlLinkifyPattern, (match, _openTagFull, openTag, _closeTagFull, closeTag, url) => {
-    // skip URLs inside existing <a> tags
-    if (openTag === 'a') {
-      inAnchor = true;
-      return match;
-    } else if (closeTag === 'a') {
-      inAnchor = false;
-      return match;
-    }
-    if (inAnchor || !url) {
-      return match;
-    }
-
-    const trailingPunct = url.match(trailingPunctPattern);
-    const cleanUrl = trailingPunct ? url.slice(0, -trailingPunct[0].length) : url;
-    const trailing = trailingPunct ? trailingPunct[0] : '';
-    // safe because regexp only matches valid URLs (no quotes or angle brackets)
-    return html`<a href="${htmlRaw(cleanUrl)}" target="_blank">${htmlRaw(cleanUrl)}</a>${htmlRaw(trailing)}`;
-  });
-}