0
0
mirror of https://github.com/go-gitea/gitea.git synced 2026-05-10 05:21:54 +02:00

feat(editor): broaden language detection in web code editor

The CodeMirror language registry only ships a narrow set of extensions
and filenames per language, so common config and DSL files (.gitconfig,
Brewfile, Vagrantfile, Containerfile, Cargo.lock, *.gemspec, *.tcc,
Snakefile, etc.) render as plain text in the file editor.

Pull authoritative extension/filename data from github-linguist via a
new `make generate-codemirror-languages` script, write a curated subset
to `assets/codemirror-languages.json`, and wire it into the editor as
overrides on top of `@codemirror/language-data`. A small set of manual
entries fill gaps Linguist classifies under separate languages
(.editorconfig, .gitconfig, .npmrc) or doesn't list at all (*.conf,
Snakefile, Containerfile.*, Dockerfile.*, Makefile.am, BSDmakefile).

The derived data structures are memoised at module scope so the work
runs once per page session, and the JSON moves into the dynamic
`importCodemirror()` chunk so it doesn't bloat the entry bundle.

Co-Authored-By: Claude (Opus 4.7) <noreply@anthropic.com>
This commit is contained in:
silverwind 2026-05-09 06:44:18 +02:00
parent a5d81d9ce2
commit b768078175
No known key found for this signature in database
GPG Key ID: 2E62B41C93869443
4 changed files with 1207 additions and 5 deletions

View File

@ -661,6 +661,10 @@ generate-gitignore: ## update gitignore files
generate-images: | node_modules ## generate images
cd tools && node generate-images.ts $(TAGS)
.PHONY: generate-codemirror-languages
generate-codemirror-languages: | node_modules ## refresh assets/codemirror-languages.json from github-linguist
node tools/generate-codemirror-languages.ts
.PHONY: generate-manpage
generate-manpage: ## generate manpage
@[ -f gitea ] || make backend

1004
assets/codemirror-languages.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,151 @@
#!/usr/bin/env node
import {load as parseYaml} from 'js-yaml';
import {writeFile} from 'node:fs/promises';
const LINGUIST_URL = 'https://raw.githubusercontent.com/github-linguist/linguist/main/lib/linguist/languages.yml';
// Map github-linguist language names to the names CodeMirror's @codemirror/language-data
// uses. Only languages that we want to load with extended extension/filename data are
// listed; everything else falls through to language-data's defaults at runtime.
const linguistToCm: Record<string, string> = {
'C': 'C',
'C++': 'C++',
'C#': 'C#',
'CMake': 'CMake',
'COBOL': 'Cobol',
'CSS': 'CSS',
'Clojure': 'Clojure',
'CoffeeScript': 'CoffeeScript',
'Common Lisp': 'Common Lisp',
'Crystal': 'Crystal',
'Cython': 'Cython',
'D': 'D',
'Dart': 'Dart',
'Diff': 'diff',
'Dockerfile': 'Dockerfile',
'Elm': 'Elm',
'Erlang': 'Erlang',
'F#': 'F#',
'Fortran': 'Fortran',
'Go': 'Go',
'Groovy': 'Groovy',
'HTML': 'HTML',
'Haskell': 'Haskell',
'INI': 'Properties files',
'JSON': 'JSON',
'Java': 'Java',
'JavaScript': 'JavaScript',
'Julia': 'Julia',
'Kotlin': 'Kotlin',
'Less': 'LESS',
'LiveScript': 'LiveScript',
'Lua': 'Lua',
'Markdown': 'Markdown',
'Nginx': 'Nginx',
'OCaml': 'OCaml',
'PHP': 'PHP',
'Pascal': 'Pascal',
'Perl': 'Perl',
'PowerShell': 'PowerShell',
'Protocol Buffer': 'ProtoBuf',
'Pug': 'Pug',
'Puppet': 'Puppet',
'Python': 'Python',
'R': 'R',
'Ruby': 'Ruby',
'Rust': 'Rust',
'SCSS': 'SCSS',
'SQL': 'SQL',
'Sass': 'Sass',
'Scala': 'Scala',
'Scheme': 'Scheme',
'Shell': 'Shell',
'Smalltalk': 'Smalltalk',
'Stylus': 'Stylus',
'Swift': 'Swift',
'SystemVerilog': 'SystemVerilog',
'TOML': 'TOML',
'TSX': 'TSX',
'Tcl': 'Tcl',
'TeX': 'LaTeX',
'TypeScript': 'TypeScript',
'VHDL': 'VHDL',
'Verilog': 'Verilog',
'Vue': 'Vue',
'WebAssembly': 'WebAssembly',
'XML': 'XML',
'YAML': 'YAML',
};
// Per-language extensions to drop. Use only for extensions that would actively collide
// with another language (e.g. .inc claimed by both PHP and C++) or where the syntax is
// genuinely incompatible with the CodeMirror mode (e.g. .csh vs sh).
const excludeExt: Record<string, string[]> = {
'C++': ['inc'],
'INI': ['frm'],
'JavaScript': ['_js', 'bones', 'es', 'es6', 'frag', 'gs', 'jake', 'javascript', 'jsb', 'jscad', 'jsfl', 'jslib', 'jsm', 'jspre', 'jss', 'njs', 'pac', 'sjs', 'ssjs', 'xsjs', 'xsjslib'],
'Lua': ['fcgi'],
'PHP': ['fcgi', 'inc'],
'Perl': ['cgi', 'fcgi'],
'Python': ['cgi', 'fcgi', 'spec'],
'Ruby': ['fcgi', 'spec'],
'Shell': ['cgi', 'csh', 'fcgi'],
'XML': ['inc', 'jsproj', 'tmpl', 'ts', 'tsx'],
};
type LinguistEntry = {
type: string;
extensions?: string[];
filenames?: string[];
};
type CmLanguage = {
name: string;
extensions: string[];
filenames: string[];
};
async function main() {
const res = await fetch(LINGUIST_URL); // eslint-disable-line no-restricted-globals -- node build script, not browser code
if (!res.ok) throw new Error(`fetch ${LINGUIST_URL} failed: ${res.status}`);
const linguist = parseYaml(await res.text()) as Record<string, LinguistEntry>;
const out: CmLanguage[] = [];
const missing: string[] = [];
for (const [linguistName, cmName] of Object.entries(linguistToCm)) {
const entry = linguist[linguistName];
if (!entry) {
missing.push(linguistName);
continue;
}
const exExt = new Set(excludeExt[linguistName]);
// CodeMirror's matchFilename uses /\.([^.]+)$/ to extract the suffix, so multi-dot
// extensions like ".cmake.in" cannot match as extensions and are dropped here.
const extensions = (entry.extensions ?? [])
.map((e) => e.replace(/^\./, ''))
.filter((e) => !e.includes('.') && !exExt.has(e));
const filenames = entry.filenames ?? [];
out.push({
name: cmName,
extensions: Array.from(new Set(extensions)),
filenames: Array.from(new Set(filenames)),
});
}
if (missing.length) {
console.warn(`linguist entries not found: ${missing.join(', ')}`);
}
out.sort((a, b) => a.name.localeCompare(b.name));
const outPath = new URL('../assets/codemirror-languages.json', import.meta.url);
await writeFile(outPath, `${JSON.stringify(out, null, 2)}\n`);
console.info(`wrote ${out.length} languages to ${outPath.pathname}`);
}
try {
await main();
} catch (err) {
console.error(err);
process.exit(1);
}

View File

@ -43,8 +43,10 @@ export type CodemirrorEditor = {
export type CodemirrorModules = Awaited<ReturnType<typeof importCodemirror>>;
type LinguistLanguage = {name: string; extensions: string[]; filenames: string[]};
async function importCodemirror() {
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap] = await Promise.all([
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistJson] = await Promise.all([
import('@codemirror/autocomplete'),
import('@codemirror/commands'),
import('@codemirror/language'),
@ -56,8 +58,42 @@ async function importCodemirror() {
import('@lezer/highlight'),
import('@replit/codemirror-indentation-markers'),
import('@replit/codemirror-vscode-keymap'),
import('../../../../assets/codemirror-languages.json', {with: {type: 'json'}}),
]);
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap};
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistLanguages: linguistJson.default as LinguistLanguage[]};
}
const manualFilenames: Record<string, string[]> = {
'Properties files': ['.editorconfig', '.gitconfig', '.npmrc'],
'Python': ['Snakefile'],
};
const manualExtensions: Record<string, string[]> = {
'Properties files': ['conf'],
};
const handledByCustomEntry = new Set(['Dockerfile', 'Markdown']);
const escapeRegex = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const filenameUnion = (filenames: string[]) =>
filenames.length ? new RegExp(`^(${filenames.map(escapeRegex).join('|')})$`) : undefined;
let baseLanguagesCache: LanguageDescription[] | null = null;
function buildBaseLanguages(cm: CodemirrorModules): LanguageDescription[] {
if (baseLanguagesCache) return baseLanguagesCache;
const loadByName = new Map<string, LanguageDescription['load']>(
cm.languageData.languages.map((l: LanguageDescription) => [l.name, l.load.bind(l)]),
);
const overrides = cm.linguistLanguages
.filter((l) => loadByName.has(l.name) && !handledByCustomEntry.has(l.name))
.map((l) => cm.language.LanguageDescription.of({
name: l.name,
extensions: [...l.extensions, ...(manualExtensions[l.name] ?? [])],
filename: filenameUnion([...l.filenames, ...(manualFilenames[l.name] ?? [])]),
load: loadByName.get(l.name)!,
}));
const overrideNames = new Set(overrides.map((o) => o.name));
const fallback = cm.languageData.languages.filter(
(l: LanguageDescription) => !overrideNames.has(l.name) && !handledByCustomEntry.has(l.name),
);
return baseLanguagesCache = [...overrides, ...fallback];
}
function togglePreviewDisplay(previewable: boolean): void {
@ -85,13 +121,20 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn
const previewableExts = new Set(config.previewableExtensions || []);
const lineWrapExts = config.lineWrapExtensions || [];
const cm = await importCodemirror();
const markdown = cm.linguistLanguages.find((l) => l.name === 'Markdown');
const dockerfile = cm.linguistLanguages.find((l) => l.name === 'Dockerfile');
const languageDescriptions: LanguageDescription[] = [
...cm.languageData.languages.filter((l: LanguageDescription) => l.name !== 'Markdown'),
...buildBaseLanguages(cm),
cm.language.LanguageDescription.of({
name: 'Markdown', extensions: ['md', 'markdown', 'mkd'],
name: 'Markdown', extensions: markdown?.extensions ?? ['md', 'markdown', 'mkd'],
load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: languageDescriptions}),
}),
cm.language.LanguageDescription.of({
name: 'Dockerfile', extensions: dockerfile?.extensions ?? ['dockerfile', 'containerfile'],
filename: /^(Containerfile|Dockerfile)(\..+)?$/i,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/dockerfile')).dockerFile)),
}),
cm.language.LanguageDescription.of({
name: 'Elixir', extensions: ['ex', 'exs'],
load: async () => (await import('codemirror-lang-elixir')).elixir(),
@ -105,7 +148,7 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn
load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(),
}),
cm.language.LanguageDescription.of({
name: 'Makefile', filename: /^(GNUm|M|m)akefile$/,
name: 'Makefile', extensions: ['mk', 'mak', 'make'], filename: /^(GNU|BSD)?[Mm]akefile(\..+)?$/,
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
}),
cm.language.LanguageDescription.of({