mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-10 05:21:54 +02:00
feat(editor): broaden language detection in web code editor
The CodeMirror language registry only ships a narrow set of extensions and filenames per language, so common config and DSL files (.gitconfig, Brewfile, Vagrantfile, Containerfile, Cargo.lock, *.gemspec, *.tcc, Snakefile, etc.) render as plain text in the file editor. Pull authoritative extension/filename data from github-linguist via a new `make generate-codemirror-languages` script, write a curated subset to `assets/codemirror-languages.json`, and wire it into the editor as overrides on top of `@codemirror/language-data`. A small set of manual entries fill gaps Linguist classifies under separate languages (.editorconfig, .gitconfig, .npmrc) or doesn't list at all (*.conf, Snakefile, Containerfile.*, Dockerfile.*, Makefile.am, BSDmakefile). The derived data structures are memoised at module scope so the work runs once per page session, and the JSON moves into the dynamic `importCodemirror()` chunk so it doesn't bloat the entry bundle. Co-Authored-By: Claude (Opus 4.7) <noreply@anthropic.com>
This commit is contained in:
parent
a5d81d9ce2
commit
b768078175
4
Makefile
4
Makefile
@ -661,6 +661,10 @@ generate-gitignore: ## update gitignore files
|
||||
generate-images: | node_modules ## generate images
|
||||
cd tools && node generate-images.ts $(TAGS)
|
||||
|
||||
.PHONY: generate-codemirror-languages
|
||||
generate-codemirror-languages: | node_modules ## refresh assets/codemirror-languages.json from github-linguist
|
||||
node tools/generate-codemirror-languages.ts
|
||||
|
||||
.PHONY: generate-manpage
|
||||
generate-manpage: ## generate manpage
|
||||
@[ -f gitea ] || make backend
|
||||
|
||||
1004
assets/codemirror-languages.json
generated
Normal file
1004
assets/codemirror-languages.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
151
tools/generate-codemirror-languages.ts
Executable file
151
tools/generate-codemirror-languages.ts
Executable file
@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env node
|
||||
import {load as parseYaml} from 'js-yaml';
|
||||
import {writeFile} from 'node:fs/promises';
|
||||
|
||||
const LINGUIST_URL = 'https://raw.githubusercontent.com/github-linguist/linguist/main/lib/linguist/languages.yml';
|
||||
|
||||
// Map github-linguist language names to the names CodeMirror's @codemirror/language-data
|
||||
// uses. Only languages that we want to load with extended extension/filename data are
|
||||
// listed; everything else falls through to language-data's defaults at runtime.
|
||||
const linguistToCm: Record<string, string> = {
|
||||
'C': 'C',
|
||||
'C++': 'C++',
|
||||
'C#': 'C#',
|
||||
'CMake': 'CMake',
|
||||
'COBOL': 'Cobol',
|
||||
'CSS': 'CSS',
|
||||
'Clojure': 'Clojure',
|
||||
'CoffeeScript': 'CoffeeScript',
|
||||
'Common Lisp': 'Common Lisp',
|
||||
'Crystal': 'Crystal',
|
||||
'Cython': 'Cython',
|
||||
'D': 'D',
|
||||
'Dart': 'Dart',
|
||||
'Diff': 'diff',
|
||||
'Dockerfile': 'Dockerfile',
|
||||
'Elm': 'Elm',
|
||||
'Erlang': 'Erlang',
|
||||
'F#': 'F#',
|
||||
'Fortran': 'Fortran',
|
||||
'Go': 'Go',
|
||||
'Groovy': 'Groovy',
|
||||
'HTML': 'HTML',
|
||||
'Haskell': 'Haskell',
|
||||
'INI': 'Properties files',
|
||||
'JSON': 'JSON',
|
||||
'Java': 'Java',
|
||||
'JavaScript': 'JavaScript',
|
||||
'Julia': 'Julia',
|
||||
'Kotlin': 'Kotlin',
|
||||
'Less': 'LESS',
|
||||
'LiveScript': 'LiveScript',
|
||||
'Lua': 'Lua',
|
||||
'Markdown': 'Markdown',
|
||||
'Nginx': 'Nginx',
|
||||
'OCaml': 'OCaml',
|
||||
'PHP': 'PHP',
|
||||
'Pascal': 'Pascal',
|
||||
'Perl': 'Perl',
|
||||
'PowerShell': 'PowerShell',
|
||||
'Protocol Buffer': 'ProtoBuf',
|
||||
'Pug': 'Pug',
|
||||
'Puppet': 'Puppet',
|
||||
'Python': 'Python',
|
||||
'R': 'R',
|
||||
'Ruby': 'Ruby',
|
||||
'Rust': 'Rust',
|
||||
'SCSS': 'SCSS',
|
||||
'SQL': 'SQL',
|
||||
'Sass': 'Sass',
|
||||
'Scala': 'Scala',
|
||||
'Scheme': 'Scheme',
|
||||
'Shell': 'Shell',
|
||||
'Smalltalk': 'Smalltalk',
|
||||
'Stylus': 'Stylus',
|
||||
'Swift': 'Swift',
|
||||
'SystemVerilog': 'SystemVerilog',
|
||||
'TOML': 'TOML',
|
||||
'TSX': 'TSX',
|
||||
'Tcl': 'Tcl',
|
||||
'TeX': 'LaTeX',
|
||||
'TypeScript': 'TypeScript',
|
||||
'VHDL': 'VHDL',
|
||||
'Verilog': 'Verilog',
|
||||
'Vue': 'Vue',
|
||||
'WebAssembly': 'WebAssembly',
|
||||
'XML': 'XML',
|
||||
'YAML': 'YAML',
|
||||
};
|
||||
|
||||
// Per-language extensions to drop. Use only for extensions that would actively collide
|
||||
// with another language (e.g. .inc claimed by both PHP and C++) or where the syntax is
|
||||
// genuinely incompatible with the CodeMirror mode (e.g. .csh vs sh).
|
||||
const excludeExt: Record<string, string[]> = {
|
||||
'C++': ['inc'],
|
||||
'INI': ['frm'],
|
||||
'JavaScript': ['_js', 'bones', 'es', 'es6', 'frag', 'gs', 'jake', 'javascript', 'jsb', 'jscad', 'jsfl', 'jslib', 'jsm', 'jspre', 'jss', 'njs', 'pac', 'sjs', 'ssjs', 'xsjs', 'xsjslib'],
|
||||
'Lua': ['fcgi'],
|
||||
'PHP': ['fcgi', 'inc'],
|
||||
'Perl': ['cgi', 'fcgi'],
|
||||
'Python': ['cgi', 'fcgi', 'spec'],
|
||||
'Ruby': ['fcgi', 'spec'],
|
||||
'Shell': ['cgi', 'csh', 'fcgi'],
|
||||
'XML': ['inc', 'jsproj', 'tmpl', 'ts', 'tsx'],
|
||||
};
|
||||
|
||||
type LinguistEntry = {
|
||||
type: string;
|
||||
extensions?: string[];
|
||||
filenames?: string[];
|
||||
};
|
||||
|
||||
type CmLanguage = {
|
||||
name: string;
|
||||
extensions: string[];
|
||||
filenames: string[];
|
||||
};
|
||||
|
||||
async function main() {
|
||||
const res = await fetch(LINGUIST_URL); // eslint-disable-line no-restricted-globals -- node build script, not browser code
|
||||
if (!res.ok) throw new Error(`fetch ${LINGUIST_URL} failed: ${res.status}`);
|
||||
const linguist = parseYaml(await res.text()) as Record<string, LinguistEntry>;
|
||||
|
||||
const out: CmLanguage[] = [];
|
||||
const missing: string[] = [];
|
||||
for (const [linguistName, cmName] of Object.entries(linguistToCm)) {
|
||||
const entry = linguist[linguistName];
|
||||
if (!entry) {
|
||||
missing.push(linguistName);
|
||||
continue;
|
||||
}
|
||||
const exExt = new Set(excludeExt[linguistName]);
|
||||
// CodeMirror's matchFilename uses /\.([^.]+)$/ to extract the suffix, so multi-dot
|
||||
// extensions like ".cmake.in" cannot match as extensions and are dropped here.
|
||||
const extensions = (entry.extensions ?? [])
|
||||
.map((e) => e.replace(/^\./, ''))
|
||||
.filter((e) => !e.includes('.') && !exExt.has(e));
|
||||
const filenames = entry.filenames ?? [];
|
||||
out.push({
|
||||
name: cmName,
|
||||
extensions: Array.from(new Set(extensions)),
|
||||
filenames: Array.from(new Set(filenames)),
|
||||
});
|
||||
}
|
||||
|
||||
if (missing.length) {
|
||||
console.warn(`linguist entries not found: ${missing.join(', ')}`);
|
||||
}
|
||||
|
||||
out.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
const outPath = new URL('../assets/codemirror-languages.json', import.meta.url);
|
||||
await writeFile(outPath, `${JSON.stringify(out, null, 2)}\n`);
|
||||
console.info(`wrote ${out.length} languages to ${outPath.pathname}`);
|
||||
}
|
||||
|
||||
try {
|
||||
await main();
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
}
|
||||
@ -43,8 +43,10 @@ export type CodemirrorEditor = {
|
||||
|
||||
export type CodemirrorModules = Awaited<ReturnType<typeof importCodemirror>>;
|
||||
|
||||
type LinguistLanguage = {name: string; extensions: string[]; filenames: string[]};
|
||||
|
||||
async function importCodemirror() {
|
||||
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap] = await Promise.all([
|
||||
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistJson] = await Promise.all([
|
||||
import('@codemirror/autocomplete'),
|
||||
import('@codemirror/commands'),
|
||||
import('@codemirror/language'),
|
||||
@ -56,8 +58,42 @@ async function importCodemirror() {
|
||||
import('@lezer/highlight'),
|
||||
import('@replit/codemirror-indentation-markers'),
|
||||
import('@replit/codemirror-vscode-keymap'),
|
||||
import('../../../../assets/codemirror-languages.json', {with: {type: 'json'}}),
|
||||
]);
|
||||
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap};
|
||||
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistLanguages: linguistJson.default as LinguistLanguage[]};
|
||||
}
|
||||
|
||||
const manualFilenames: Record<string, string[]> = {
|
||||
'Properties files': ['.editorconfig', '.gitconfig', '.npmrc'],
|
||||
'Python': ['Snakefile'],
|
||||
};
|
||||
const manualExtensions: Record<string, string[]> = {
|
||||
'Properties files': ['conf'],
|
||||
};
|
||||
const handledByCustomEntry = new Set(['Dockerfile', 'Markdown']);
|
||||
const escapeRegex = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const filenameUnion = (filenames: string[]) =>
|
||||
filenames.length ? new RegExp(`^(${filenames.map(escapeRegex).join('|')})$`) : undefined;
|
||||
|
||||
let baseLanguagesCache: LanguageDescription[] | null = null;
|
||||
function buildBaseLanguages(cm: CodemirrorModules): LanguageDescription[] {
|
||||
if (baseLanguagesCache) return baseLanguagesCache;
|
||||
const loadByName = new Map<string, LanguageDescription['load']>(
|
||||
cm.languageData.languages.map((l: LanguageDescription) => [l.name, l.load.bind(l)]),
|
||||
);
|
||||
const overrides = cm.linguistLanguages
|
||||
.filter((l) => loadByName.has(l.name) && !handledByCustomEntry.has(l.name))
|
||||
.map((l) => cm.language.LanguageDescription.of({
|
||||
name: l.name,
|
||||
extensions: [...l.extensions, ...(manualExtensions[l.name] ?? [])],
|
||||
filename: filenameUnion([...l.filenames, ...(manualFilenames[l.name] ?? [])]),
|
||||
load: loadByName.get(l.name)!,
|
||||
}));
|
||||
const overrideNames = new Set(overrides.map((o) => o.name));
|
||||
const fallback = cm.languageData.languages.filter(
|
||||
(l: LanguageDescription) => !overrideNames.has(l.name) && !handledByCustomEntry.has(l.name),
|
||||
);
|
||||
return baseLanguagesCache = [...overrides, ...fallback];
|
||||
}
|
||||
|
||||
function togglePreviewDisplay(previewable: boolean): void {
|
||||
@ -85,13 +121,20 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn
|
||||
const previewableExts = new Set(config.previewableExtensions || []);
|
||||
const lineWrapExts = config.lineWrapExtensions || [];
|
||||
const cm = await importCodemirror();
|
||||
const markdown = cm.linguistLanguages.find((l) => l.name === 'Markdown');
|
||||
const dockerfile = cm.linguistLanguages.find((l) => l.name === 'Dockerfile');
|
||||
|
||||
const languageDescriptions: LanguageDescription[] = [
|
||||
...cm.languageData.languages.filter((l: LanguageDescription) => l.name !== 'Markdown'),
|
||||
...buildBaseLanguages(cm),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Markdown', extensions: ['md', 'markdown', 'mkd'],
|
||||
name: 'Markdown', extensions: markdown?.extensions ?? ['md', 'markdown', 'mkd'],
|
||||
load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: languageDescriptions}),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Dockerfile', extensions: dockerfile?.extensions ?? ['dockerfile', 'containerfile'],
|
||||
filename: /^(Containerfile|Dockerfile)(\..+)?$/i,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/dockerfile')).dockerFile)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Elixir', extensions: ['ex', 'exs'],
|
||||
load: async () => (await import('codemirror-lang-elixir')).elixir(),
|
||||
@ -105,7 +148,7 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn
|
||||
load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Makefile', filename: /^(GNUm|M|m)akefile$/,
|
||||
name: 'Makefile', extensions: ['mk', 'mak', 'make'], filename: /^(GNU|BSD)?[Mm]akefile(\..+)?$/,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user