0
0
mirror of https://github.com/go-gitea/gitea.git synced 2026-05-11 02:54:05 +02:00

feat(editor): broaden language detection in web code editor (#37619)

Use
https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml
to substantially improve syntax higlighting in Codemirror. File is
generated on-demand only.

Signed-off-by: silverwind <me@silverwind.io>
Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
This commit is contained in:
silverwind 2026-05-10 06:51:46 +02:00 committed by GitHub
parent 0a3aaeafe7
commit a61598884f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1513 additions and 40 deletions

View File

@ -661,6 +661,10 @@ generate-gitignore: ## update gitignore files
generate-images: | node_modules ## generate images
cd tools && node generate-images.ts $(TAGS)
.PHONY: generate-codemirror-languages
generate-codemirror-languages: | node_modules ## generate codemirror languages
node tools/generate-codemirror-languages.ts
.PHONY: generate-manpage
generate-manpage: ## generate manpage
@[ -f gitea ] || make backend

1277
assets/codemirror-languages.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -570,8 +570,6 @@ export default defineConfig([
'no-redeclare': [0], // must be disabled for typescript overloads
'no-regex-spaces': [2],
'no-restricted-exports': [0],
'no-restricted-globals': [2, ...restrictedGlobals],
'no-restricted-properties': [2, ...restrictedProperties],
'no-restricted-imports': [2, {paths: [
{name: 'jquery', message: 'Use the global $ instead', allowTypeImports: true},
]}],
@ -1022,5 +1020,9 @@ export default defineConfig([
{
files: ['web_src/**/*'],
languageOptions: {globals: {...globals.browser, ...globals.jquery}},
rules: {
'no-restricted-globals': [2, ...restrictedGlobals],
'no-restricted-properties': [2, ...restrictedProperties],
},
},
]);

View File

@ -192,12 +192,13 @@ func isViteDevRequest(req *http.Request) bool {
// Vite uses a path relative to project root and adds "?import" to non-JS/CSS asset imports:
// - {WebSite}/public/assets/... (e.g. SVG icons from "{RepoRoot}/public/assets/img/svg/")
// - {WebSite}/assets/emoji.json: it is an exception for the frontend assets, it is imported by JS code, but:
// - {WebSite}/assets/<file>.json: exception for frontend-imported repo-root assets:
// - KEEP IN MIND: all static frontend assets are served from "{AssetFS}/assets" to "{WebSite}/assets" by Gitea Web Server
// - "{AssetFS}" is a layered filesystem from "{RepoRoot}/public" or embedded assets, and user's custom files in "{CustomPath}/public"
// - "{RepoRoot}/assets/emoji.json" just happens to have the dir name "assets", it is not related to frontend assets
// - "{RepoRoot}/assets/*.json" just happens to live under the dir name "assets"; it is not related to frontend assets
// - BAD DESIGN: indeed it is a "conflicted and polluted name" sample
if path == "/assets/emoji.json" {
switch path {
case "/assets/emoji.json", "/assets/codemirror-languages.json":
return true
}
return false

View File

@ -0,0 +1,95 @@
#!/usr/bin/env node
import {load as parseYaml} from 'js-yaml';
import {writeFile} from 'node:fs/promises';
import {languages as cmLanguages} from '@codemirror/language-data';
const linguistUrl = 'https://raw.githubusercontent.com/github-linguist/linguist/main/lib/linguist/languages.yml';
const renames: Record<string, string> = {
'Protocol Buffer': 'ProtoBuf',
};
// Languages whose entry is constructed manually in the runtime; skip during generation.
const skipNames = new Set(['Dockerfile', 'Markdown']);
// Extensions claimed by several unrelated languages with no good default; strip globally.
const ambiguousExt = new Set(['cgi', 'fcgi', 'inc']);
// Per-language drops for non-text formats (.frm = binary VB6 forms) or where Linguist's
// primary owner conflicts with a more specialised CodeMirror mode (.spec → RPM Spec).
const excludeExt: Record<string, string[]> = {
'INI': ['frm'],
'Python': ['spec'],
'Ruby': ['spec'],
};
// Per-CM-language additions for filenames Linguist classifies as separate languages
// (.editorconfig, .gitconfig, .npmrc) or omits entirely (Snakefile).
const extraFilenames: Record<string, string[]> = {
'Properties files': ['.editorconfig', '.gitconfig', '.npmrc'],
'Python': ['Snakefile'],
};
// Per-CM-language additions widely used in practice but absent from Linguist's list.
const extraExtensions: Record<string, string[]> = {
'Properties files': ['conf'],
};
type LinguistEntry = {
type: string;
extensions?: string[];
filenames?: string[];
};
type CmLanguage = {
name: string;
extensions: string[];
filenames: string[];
};
const res = await fetch(linguistUrl);
if (!res.ok) throw new Error(`fetch ${linguistUrl} failed: ${res.status}`);
const linguist = parseYaml(await res.text()) as Record<string, LinguistEntry>;
const cmByAlias = new Map<string, string>();
// Map of extension -> the CM language that originally owns it. Used to prevent Linguist
// from broadening one language's extension claim into another's territory (e.g. Linguist's
// PLSQL lists .sql, but CM's SQL is the canonical owner).
const cmOriginalExtOwner = new Map<string, string>();
for (const lang of cmLanguages) {
cmByAlias.set(lang.name.toLowerCase(), lang.name);
for (const a of lang.alias) cmByAlias.set(a.toLowerCase(), lang.name);
for (const ext of lang.extensions) {
if (!cmOriginalExtOwner.has(ext)) cmOriginalExtOwner.set(ext, lang.name);
}
}
const out: CmLanguage[] = [];
const seen = new Set<string>();
for (const [linguistName, entry] of Object.entries(linguist)) {
const cmName = renames[linguistName] ?? cmByAlias.get(linguistName.toLowerCase());
// Multiple Linguist entries can alias to the same CM language (e.g. JSON5 → JSON).
if (!cmName || skipNames.has(cmName) || seen.has(cmName)) continue;
seen.add(cmName);
const exExt = new Set(excludeExt[linguistName]);
// CodeMirror's matchFilename uses /\.([^.]+)$/, so multi-dot extensions like
// ".cmake.in" can't match as extensions and are dropped here.
const extensions = (entry.extensions ?? [])
.map((e) => e.replace(/^\./, ''))
.filter((e) => {
if (e.includes('.') || ambiguousExt.has(e) || exExt.has(e)) return false;
const owner = cmOriginalExtOwner.get(e);
return !owner || owner === cmName;
});
out.push({
name: cmName,
extensions: [...extensions, ...(extraExtensions[cmName] ?? [])],
filenames: [...(entry.filenames ?? []), ...(extraFilenames[cmName] ?? [])],
});
}
out.sort((a, b) => a.name.localeCompare(b.name));
const outPath = new URL('../assets/codemirror-languages.json', import.meta.url);
await writeFile(outPath, `${JSON.stringify(out, null, 2)}\n`);
console.info(`wrote ${out.length} languages to ${outPath.pathname}`);

View File

@ -0,0 +1,54 @@
import {buildLanguageDescriptions, importCodemirror} from './main.ts';
test('matchFilename — language detection covers extended rules', async () => {
const cm = await importCodemirror();
const list = buildLanguageDescriptions(cm);
const match = (filename: string) =>
cm.language.LanguageDescription.matchFilename(list, filename)?.name;
// Linguist-supplied filenames + extensions
expect(match('.bashrc')).toBe('Shell');
expect(match('PKGBUILD')).toBe('Shell');
expect(match('foo.zsh')).toBe('Shell');
expect(match('Cargo.lock')).toBe('TOML');
expect(match('Gemfile')).toBe('Ruby');
expect(match('foo.gemspec')).toBe('Ruby');
expect(match('foo.psgi')).toBe('Perl');
expect(match('foo.pyi')).toBe('Python');
expect(match('foo.webmanifest')).toBe('JSON');
expect(match('foo.tcc')).toBe('C++');
// Script-side extras (extraFilenames / extraExtensions)
expect(match('.editorconfig')).toBe('Properties files');
expect(match('foo.conf')).toBe('Properties files');
expect(match('Snakefile')).toBe('Python');
// Custom Gitea entries override language-data
expect(match('Containerfile.test')).toBe('Dockerfile');
expect(match('Dockerfile.dev')).toBe('Dockerfile');
expect(match('Makefile.am')).toBe('Makefile');
expect(match('foo.mk')).toBe('Makefile');
expect(match('.env.local')).toBe('Dotenv');
expect(match('foo.json5')).toBe('JSON5');
expect(match('foo.mdown')).toBe('Markdown');
// Filename regex wins over extension match
expect(match('nginx.conf')).toBe('Nginx');
// .spec routes to RPM Spec via excludeExt redirect
expect(match('foo.spec')).toBe('RPM Spec');
// CM original ownership preserved against Linguist's broader claims (.sql is SQL,
// not PLSQL, even though Linguist's PLSQL extension list includes it).
expect(match('foo.sql')).toBe('SQL');
expect(match('foo.h')).toBe('C');
expect(match('foo.mm')).toBe('Objective-C++');
// Globally ambiguous extensions fall through to plain text
expect(match('foo.cgi')).toBeUndefined();
expect(match('foo.inc')).toBeUndefined();
// Smoke: existing language-data entries still resolve
expect(match('foo.go')).toBe('Go');
expect(match('foo.tsx')).toBe('TSX');
});

View File

@ -41,10 +41,12 @@ export type CodemirrorEditor = {
};
};
type LinguistLanguage = {name: string; extensions: string[]; filenames: string[]};
export type CodemirrorModules = Awaited<ReturnType<typeof importCodemirror>>;
async function importCodemirror() {
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap] = await Promise.all([
export async function importCodemirror() {
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguist] = await Promise.all([
import('@codemirror/autocomplete'),
import('@codemirror/commands'),
import('@codemirror/language'),
@ -56,8 +58,77 @@ async function importCodemirror() {
import('@lezer/highlight'),
import('@replit/codemirror-indentation-markers'),
import('@replit/codemirror-vscode-keymap'),
import('../../../../assets/codemirror-languages.json'),
]);
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap};
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistLanguages: linguist.default as LinguistLanguage[]};
}
const escapeRegex = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const filenameUnion = (filenames: string[]) =>
filenames.length ? new RegExp(`^(${filenames.map(escapeRegex).join('|')})$`) : undefined;
export function buildLanguageDescriptions(cm: CodemirrorModules): LanguageDescription[] {
const list: LanguageDescription[] = [
...buildBaseLanguages(cm),
cm.language.LanguageDescription.of({
name: 'Markdown', extensions: ['md', 'markdown', 'mkd', 'mdown', 'mdwn', 'mkdn', 'mkdown'],
load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: list}),
}),
cm.language.LanguageDescription.of({
name: 'Dockerfile', extensions: ['dockerfile', 'containerfile'],
filename: /^(Containerfile|Dockerfile)(\..+)?$/i,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/dockerfile')).dockerFile)),
}),
cm.language.LanguageDescription.of({
name: 'Elixir', extensions: ['ex', 'exs'],
load: async () => (await import('codemirror-lang-elixir')).elixir(),
}),
cm.language.LanguageDescription.of({
name: 'Nix', extensions: ['nix'],
load: async () => (await import('@replit/codemirror-lang-nix')).nix(),
}),
cm.language.LanguageDescription.of({
name: 'Svelte', extensions: ['svelte'],
load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(),
}),
cm.language.LanguageDescription.of({
name: 'Makefile', extensions: ['mk', 'mak', 'make'], filename: /^(GNU|BSD)?[Mm]akefile(\..+)?$/,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
}),
cm.language.LanguageDescription.of({
name: 'Dotenv', extensions: ['env'], filename: /^\.env(\..*)?$/,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
}),
cm.language.LanguageDescription.of({
name: 'JSON5', extensions: ['json5', 'jsonc'],
load: async () => (await import('@codemirror/lang-json')).json(),
}),
];
return list;
}
// Languages that the JSON omits because they're constructed manually above.
const customNames = new Set(['Dockerfile', 'Markdown']);
let baseLanguagesCache: LanguageDescription[] | null = null;
function buildBaseLanguages(cm: CodemirrorModules): LanguageDescription[] {
if (baseLanguagesCache) return baseLanguagesCache;
const loadByName = new Map<string, LanguageDescription['load']>(
cm.languageData.languages.map((l: LanguageDescription) => [l.name, l.load.bind(l)]),
);
const overrides = cm.linguistLanguages
.filter((l) => loadByName.has(l.name))
.map((l) => cm.language.LanguageDescription.of({
name: l.name,
extensions: l.extensions,
filename: filenameUnion(l.filenames),
load: loadByName.get(l.name)!,
}));
const overrideNames = new Set(overrides.map((o) => o.name));
const fallback = cm.languageData.languages.filter(
(l: LanguageDescription) => !overrideNames.has(l.name) && !customNames.has(l.name),
);
return baseLanguagesCache = [...overrides, ...fallback];
}
function togglePreviewDisplay(previewable: boolean): void {
@ -85,38 +156,7 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn
const previewableExts = new Set(config.previewableExtensions || []);
const lineWrapExts = config.lineWrapExtensions || [];
const cm = await importCodemirror();
const languageDescriptions: LanguageDescription[] = [
...cm.languageData.languages.filter((l: LanguageDescription) => l.name !== 'Markdown'),
cm.language.LanguageDescription.of({
name: 'Markdown', extensions: ['md', 'markdown', 'mkd'],
load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: languageDescriptions}),
}),
cm.language.LanguageDescription.of({
name: 'Elixir', extensions: ['ex', 'exs'],
load: async () => (await import('codemirror-lang-elixir')).elixir(),
}),
cm.language.LanguageDescription.of({
name: 'Nix', extensions: ['nix'],
load: async () => (await import('@replit/codemirror-lang-nix')).nix(),
}),
cm.language.LanguageDescription.of({
name: 'Svelte', extensions: ['svelte'],
load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(),
}),
cm.language.LanguageDescription.of({
name: 'Makefile', filename: /^(GNUm|M|m)akefile$/,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
}),
cm.language.LanguageDescription.of({
name: 'Dotenv', extensions: ['env'], filename: /^\.env(\..*)?$/,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
}),
cm.language.LanguageDescription.of({
name: 'JSON5', extensions: ['json5', 'jsonc'],
load: async () => (await import('@codemirror/lang-json')).json(),
}),
];
const languageDescriptions = buildLanguageDescriptions(cm);
const matchedLang = cm.language.LanguageDescription.matchFilename(languageDescriptions, config.filename);
const container = document.createElement('div');