mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-11 02:54:05 +02:00
feat(editor): broaden language detection in web code editor (#37619)
Use https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml to substantially improve syntax higlighting in Codemirror. File is generated on-demand only. Signed-off-by: silverwind <me@silverwind.io> Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
This commit is contained in:
parent
0a3aaeafe7
commit
a61598884f
4
Makefile
4
Makefile
@ -661,6 +661,10 @@ generate-gitignore: ## update gitignore files
|
||||
generate-images: | node_modules ## generate images
|
||||
cd tools && node generate-images.ts $(TAGS)
|
||||
|
||||
.PHONY: generate-codemirror-languages
|
||||
generate-codemirror-languages: | node_modules ## generate codemirror languages
|
||||
node tools/generate-codemirror-languages.ts
|
||||
|
||||
.PHONY: generate-manpage
|
||||
generate-manpage: ## generate manpage
|
||||
@[ -f gitea ] || make backend
|
||||
|
||||
1277
assets/codemirror-languages.json
generated
Normal file
1277
assets/codemirror-languages.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -570,8 +570,6 @@ export default defineConfig([
|
||||
'no-redeclare': [0], // must be disabled for typescript overloads
|
||||
'no-regex-spaces': [2],
|
||||
'no-restricted-exports': [0],
|
||||
'no-restricted-globals': [2, ...restrictedGlobals],
|
||||
'no-restricted-properties': [2, ...restrictedProperties],
|
||||
'no-restricted-imports': [2, {paths: [
|
||||
{name: 'jquery', message: 'Use the global $ instead', allowTypeImports: true},
|
||||
]}],
|
||||
@ -1022,5 +1020,9 @@ export default defineConfig([
|
||||
{
|
||||
files: ['web_src/**/*'],
|
||||
languageOptions: {globals: {...globals.browser, ...globals.jquery}},
|
||||
rules: {
|
||||
'no-restricted-globals': [2, ...restrictedGlobals],
|
||||
'no-restricted-properties': [2, ...restrictedProperties],
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
@ -192,12 +192,13 @@ func isViteDevRequest(req *http.Request) bool {
|
||||
|
||||
// Vite uses a path relative to project root and adds "?import" to non-JS/CSS asset imports:
|
||||
// - {WebSite}/public/assets/... (e.g. SVG icons from "{RepoRoot}/public/assets/img/svg/")
|
||||
// - {WebSite}/assets/emoji.json: it is an exception for the frontend assets, it is imported by JS code, but:
|
||||
// - {WebSite}/assets/<file>.json: exception for frontend-imported repo-root assets:
|
||||
// - KEEP IN MIND: all static frontend assets are served from "{AssetFS}/assets" to "{WebSite}/assets" by Gitea Web Server
|
||||
// - "{AssetFS}" is a layered filesystem from "{RepoRoot}/public" or embedded assets, and user's custom files in "{CustomPath}/public"
|
||||
// - "{RepoRoot}/assets/emoji.json" just happens to have the dir name "assets", it is not related to frontend assets
|
||||
// - "{RepoRoot}/assets/*.json" just happens to live under the dir name "assets"; it is not related to frontend assets
|
||||
// - BAD DESIGN: indeed it is a "conflicted and polluted name" sample
|
||||
if path == "/assets/emoji.json" {
|
||||
switch path {
|
||||
case "/assets/emoji.json", "/assets/codemirror-languages.json":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
95
tools/generate-codemirror-languages.ts
Executable file
95
tools/generate-codemirror-languages.ts
Executable file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env node
|
||||
import {load as parseYaml} from 'js-yaml';
|
||||
import {writeFile} from 'node:fs/promises';
|
||||
import {languages as cmLanguages} from '@codemirror/language-data';
|
||||
|
||||
const linguistUrl = 'https://raw.githubusercontent.com/github-linguist/linguist/main/lib/linguist/languages.yml';
|
||||
|
||||
const renames: Record<string, string> = {
|
||||
'Protocol Buffer': 'ProtoBuf',
|
||||
};
|
||||
|
||||
// Languages whose entry is constructed manually in the runtime; skip during generation.
|
||||
const skipNames = new Set(['Dockerfile', 'Markdown']);
|
||||
|
||||
// Extensions claimed by several unrelated languages with no good default; strip globally.
|
||||
const ambiguousExt = new Set(['cgi', 'fcgi', 'inc']);
|
||||
|
||||
// Per-language drops for non-text formats (.frm = binary VB6 forms) or where Linguist's
|
||||
// primary owner conflicts with a more specialised CodeMirror mode (.spec → RPM Spec).
|
||||
const excludeExt: Record<string, string[]> = {
|
||||
'INI': ['frm'],
|
||||
'Python': ['spec'],
|
||||
'Ruby': ['spec'],
|
||||
};
|
||||
|
||||
// Per-CM-language additions for filenames Linguist classifies as separate languages
|
||||
// (.editorconfig, .gitconfig, .npmrc) or omits entirely (Snakefile).
|
||||
const extraFilenames: Record<string, string[]> = {
|
||||
'Properties files': ['.editorconfig', '.gitconfig', '.npmrc'],
|
||||
'Python': ['Snakefile'],
|
||||
};
|
||||
|
||||
// Per-CM-language additions widely used in practice but absent from Linguist's list.
|
||||
const extraExtensions: Record<string, string[]> = {
|
||||
'Properties files': ['conf'],
|
||||
};
|
||||
|
||||
type LinguistEntry = {
|
||||
type: string;
|
||||
extensions?: string[];
|
||||
filenames?: string[];
|
||||
};
|
||||
|
||||
type CmLanguage = {
|
||||
name: string;
|
||||
extensions: string[];
|
||||
filenames: string[];
|
||||
};
|
||||
|
||||
const res = await fetch(linguistUrl);
|
||||
if (!res.ok) throw new Error(`fetch ${linguistUrl} failed: ${res.status}`);
|
||||
const linguist = parseYaml(await res.text()) as Record<string, LinguistEntry>;
|
||||
|
||||
const cmByAlias = new Map<string, string>();
|
||||
// Map of extension -> the CM language that originally owns it. Used to prevent Linguist
|
||||
// from broadening one language's extension claim into another's territory (e.g. Linguist's
|
||||
// PLSQL lists .sql, but CM's SQL is the canonical owner).
|
||||
const cmOriginalExtOwner = new Map<string, string>();
|
||||
for (const lang of cmLanguages) {
|
||||
cmByAlias.set(lang.name.toLowerCase(), lang.name);
|
||||
for (const a of lang.alias) cmByAlias.set(a.toLowerCase(), lang.name);
|
||||
for (const ext of lang.extensions) {
|
||||
if (!cmOriginalExtOwner.has(ext)) cmOriginalExtOwner.set(ext, lang.name);
|
||||
}
|
||||
}
|
||||
|
||||
const out: CmLanguage[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const [linguistName, entry] of Object.entries(linguist)) {
|
||||
const cmName = renames[linguistName] ?? cmByAlias.get(linguistName.toLowerCase());
|
||||
// Multiple Linguist entries can alias to the same CM language (e.g. JSON5 → JSON).
|
||||
if (!cmName || skipNames.has(cmName) || seen.has(cmName)) continue;
|
||||
seen.add(cmName);
|
||||
const exExt = new Set(excludeExt[linguistName]);
|
||||
// CodeMirror's matchFilename uses /\.([^.]+)$/, so multi-dot extensions like
|
||||
// ".cmake.in" can't match as extensions and are dropped here.
|
||||
const extensions = (entry.extensions ?? [])
|
||||
.map((e) => e.replace(/^\./, ''))
|
||||
.filter((e) => {
|
||||
if (e.includes('.') || ambiguousExt.has(e) || exExt.has(e)) return false;
|
||||
const owner = cmOriginalExtOwner.get(e);
|
||||
return !owner || owner === cmName;
|
||||
});
|
||||
out.push({
|
||||
name: cmName,
|
||||
extensions: [...extensions, ...(extraExtensions[cmName] ?? [])],
|
||||
filenames: [...(entry.filenames ?? []), ...(extraFilenames[cmName] ?? [])],
|
||||
});
|
||||
}
|
||||
|
||||
out.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
const outPath = new URL('../assets/codemirror-languages.json', import.meta.url);
|
||||
await writeFile(outPath, `${JSON.stringify(out, null, 2)}\n`);
|
||||
console.info(`wrote ${out.length} languages to ${outPath.pathname}`);
|
||||
54
web_src/js/modules/codeeditor/main.test.ts
Normal file
54
web_src/js/modules/codeeditor/main.test.ts
Normal file
@ -0,0 +1,54 @@
|
||||
import {buildLanguageDescriptions, importCodemirror} from './main.ts';
|
||||
|
||||
test('matchFilename — language detection covers extended rules', async () => {
|
||||
const cm = await importCodemirror();
|
||||
const list = buildLanguageDescriptions(cm);
|
||||
const match = (filename: string) =>
|
||||
cm.language.LanguageDescription.matchFilename(list, filename)?.name;
|
||||
|
||||
// Linguist-supplied filenames + extensions
|
||||
expect(match('.bashrc')).toBe('Shell');
|
||||
expect(match('PKGBUILD')).toBe('Shell');
|
||||
expect(match('foo.zsh')).toBe('Shell');
|
||||
expect(match('Cargo.lock')).toBe('TOML');
|
||||
expect(match('Gemfile')).toBe('Ruby');
|
||||
expect(match('foo.gemspec')).toBe('Ruby');
|
||||
expect(match('foo.psgi')).toBe('Perl');
|
||||
expect(match('foo.pyi')).toBe('Python');
|
||||
expect(match('foo.webmanifest')).toBe('JSON');
|
||||
expect(match('foo.tcc')).toBe('C++');
|
||||
|
||||
// Script-side extras (extraFilenames / extraExtensions)
|
||||
expect(match('.editorconfig')).toBe('Properties files');
|
||||
expect(match('foo.conf')).toBe('Properties files');
|
||||
expect(match('Snakefile')).toBe('Python');
|
||||
|
||||
// Custom Gitea entries override language-data
|
||||
expect(match('Containerfile.test')).toBe('Dockerfile');
|
||||
expect(match('Dockerfile.dev')).toBe('Dockerfile');
|
||||
expect(match('Makefile.am')).toBe('Makefile');
|
||||
expect(match('foo.mk')).toBe('Makefile');
|
||||
expect(match('.env.local')).toBe('Dotenv');
|
||||
expect(match('foo.json5')).toBe('JSON5');
|
||||
expect(match('foo.mdown')).toBe('Markdown');
|
||||
|
||||
// Filename regex wins over extension match
|
||||
expect(match('nginx.conf')).toBe('Nginx');
|
||||
|
||||
// .spec routes to RPM Spec via excludeExt redirect
|
||||
expect(match('foo.spec')).toBe('RPM Spec');
|
||||
|
||||
// CM original ownership preserved against Linguist's broader claims (.sql is SQL,
|
||||
// not PLSQL, even though Linguist's PLSQL extension list includes it).
|
||||
expect(match('foo.sql')).toBe('SQL');
|
||||
expect(match('foo.h')).toBe('C');
|
||||
expect(match('foo.mm')).toBe('Objective-C++');
|
||||
|
||||
// Globally ambiguous extensions fall through to plain text
|
||||
expect(match('foo.cgi')).toBeUndefined();
|
||||
expect(match('foo.inc')).toBeUndefined();
|
||||
|
||||
// Smoke: existing language-data entries still resolve
|
||||
expect(match('foo.go')).toBe('Go');
|
||||
expect(match('foo.tsx')).toBe('TSX');
|
||||
});
|
||||
@ -41,10 +41,12 @@ export type CodemirrorEditor = {
|
||||
};
|
||||
};
|
||||
|
||||
type LinguistLanguage = {name: string; extensions: string[]; filenames: string[]};
|
||||
|
||||
export type CodemirrorModules = Awaited<ReturnType<typeof importCodemirror>>;
|
||||
|
||||
async function importCodemirror() {
|
||||
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap] = await Promise.all([
|
||||
export async function importCodemirror() {
|
||||
const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguist] = await Promise.all([
|
||||
import('@codemirror/autocomplete'),
|
||||
import('@codemirror/commands'),
|
||||
import('@codemirror/language'),
|
||||
@ -56,8 +58,77 @@ async function importCodemirror() {
|
||||
import('@lezer/highlight'),
|
||||
import('@replit/codemirror-indentation-markers'),
|
||||
import('@replit/codemirror-vscode-keymap'),
|
||||
import('../../../../assets/codemirror-languages.json'),
|
||||
]);
|
||||
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap};
|
||||
return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistLanguages: linguist.default as LinguistLanguage[]};
|
||||
}
|
||||
|
||||
const escapeRegex = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const filenameUnion = (filenames: string[]) =>
|
||||
filenames.length ? new RegExp(`^(${filenames.map(escapeRegex).join('|')})$`) : undefined;
|
||||
|
||||
export function buildLanguageDescriptions(cm: CodemirrorModules): LanguageDescription[] {
|
||||
const list: LanguageDescription[] = [
|
||||
...buildBaseLanguages(cm),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Markdown', extensions: ['md', 'markdown', 'mkd', 'mdown', 'mdwn', 'mkdn', 'mkdown'],
|
||||
load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: list}),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Dockerfile', extensions: ['dockerfile', 'containerfile'],
|
||||
filename: /^(Containerfile|Dockerfile)(\..+)?$/i,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/dockerfile')).dockerFile)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Elixir', extensions: ['ex', 'exs'],
|
||||
load: async () => (await import('codemirror-lang-elixir')).elixir(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Nix', extensions: ['nix'],
|
||||
load: async () => (await import('@replit/codemirror-lang-nix')).nix(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Svelte', extensions: ['svelte'],
|
||||
load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Makefile', extensions: ['mk', 'mak', 'make'], filename: /^(GNU|BSD)?[Mm]akefile(\..+)?$/,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Dotenv', extensions: ['env'], filename: /^\.env(\..*)?$/,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'JSON5', extensions: ['json5', 'jsonc'],
|
||||
load: async () => (await import('@codemirror/lang-json')).json(),
|
||||
}),
|
||||
];
|
||||
return list;
|
||||
}
|
||||
|
||||
// Languages that the JSON omits because they're constructed manually above.
|
||||
const customNames = new Set(['Dockerfile', 'Markdown']);
|
||||
|
||||
let baseLanguagesCache: LanguageDescription[] | null = null;
|
||||
function buildBaseLanguages(cm: CodemirrorModules): LanguageDescription[] {
|
||||
if (baseLanguagesCache) return baseLanguagesCache;
|
||||
const loadByName = new Map<string, LanguageDescription['load']>(
|
||||
cm.languageData.languages.map((l: LanguageDescription) => [l.name, l.load.bind(l)]),
|
||||
);
|
||||
const overrides = cm.linguistLanguages
|
||||
.filter((l) => loadByName.has(l.name))
|
||||
.map((l) => cm.language.LanguageDescription.of({
|
||||
name: l.name,
|
||||
extensions: l.extensions,
|
||||
filename: filenameUnion(l.filenames),
|
||||
load: loadByName.get(l.name)!,
|
||||
}));
|
||||
const overrideNames = new Set(overrides.map((o) => o.name));
|
||||
const fallback = cm.languageData.languages.filter(
|
||||
(l: LanguageDescription) => !overrideNames.has(l.name) && !customNames.has(l.name),
|
||||
);
|
||||
return baseLanguagesCache = [...overrides, ...fallback];
|
||||
}
|
||||
|
||||
function togglePreviewDisplay(previewable: boolean): void {
|
||||
@ -85,38 +156,7 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn
|
||||
const previewableExts = new Set(config.previewableExtensions || []);
|
||||
const lineWrapExts = config.lineWrapExtensions || [];
|
||||
const cm = await importCodemirror();
|
||||
|
||||
const languageDescriptions: LanguageDescription[] = [
|
||||
...cm.languageData.languages.filter((l: LanguageDescription) => l.name !== 'Markdown'),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Markdown', extensions: ['md', 'markdown', 'mkd'],
|
||||
load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: languageDescriptions}),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Elixir', extensions: ['ex', 'exs'],
|
||||
load: async () => (await import('codemirror-lang-elixir')).elixir(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Nix', extensions: ['nix'],
|
||||
load: async () => (await import('@replit/codemirror-lang-nix')).nix(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Svelte', extensions: ['svelte'],
|
||||
load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Makefile', filename: /^(GNUm|M|m)akefile$/,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'Dotenv', extensions: ['env'], filename: /^\.env(\..*)?$/,
|
||||
load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)),
|
||||
}),
|
||||
cm.language.LanguageDescription.of({
|
||||
name: 'JSON5', extensions: ['json5', 'jsonc'],
|
||||
load: async () => (await import('@codemirror/lang-json')).json(),
|
||||
}),
|
||||
];
|
||||
const languageDescriptions = buildLanguageDescriptions(cm);
|
||||
const matchedLang = cm.language.LanguageDescription.matchFilename(languageDescriptions, config.filename);
|
||||
|
||||
const container = document.createElement('div');
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user