0
0
mirror of https://github.com/go-gitea/gitea.git synced 2026-05-10 09:41:52 +02:00

refactor(editor): auto-match against @codemirror/language-data

Drop the hand-curated 67-name list; intersect Linguist's full set
against language-data's exported names instead. A small renames map
covers the six places where the two registries disagree on naming.
Adds 34 niche languages we hadn't curated (Cypher, Eiffel, Erlang,
Mathematica, etc.) at +4 KB on the JSON.

Co-Authored-By: Claude (Opus 4.7) <noreply@anthropic.com>
This commit is contained in:
silverwind 2026-05-09 06:58:30 +02:00
parent 023a11a3f9
commit 5764fc2546
No known key found for this signature in database
GPG Key ID: 2E62B41C93869443
2 changed files with 297 additions and 26 deletions

View File

@ -1,4 +1,28 @@
[
{
"name": "APL",
"extensions": [
"apl",
"dyalog"
],
"filenames": []
},
{
"name": "ASN.1",
"extensions": [
"asn",
"asn1"
],
"filenames": []
},
{
"name": "Brainfuck",
"extensions": [
"b",
"bf"
],
"filenames": []
},
{
"name": "C",
"extensions": [
@ -108,6 +132,13 @@
],
"filenames": []
},
{
"name": "CQL",
"extensions": [
"cql"
],
"filenames": []
},
{
"name": "Crystal",
"extensions": [
@ -122,6 +153,14 @@
],
"filenames": []
},
{
"name": "Cypher",
"extensions": [
"cyp",
"cypher"
],
"filenames": []
},
{
"name": "Cython",
"extensions": [
@ -165,6 +204,45 @@
"Dockerfile"
]
},
{
"name": "Dylan",
"extensions": [
"dylan",
"dyl",
"intr",
"lid"
],
"filenames": []
},
{
"name": "EBNF",
"extensions": [
"ebnf"
],
"filenames": []
},
{
"name": "ECL",
"extensions": [
"ecl",
"eclxml"
],
"filenames": []
},
{
"name": "edn",
"extensions": [
"edn"
],
"filenames": []
},
{
"name": "Eiffel",
"extensions": [
"e"
],
"filenames": []
},
{
"name": "Elm",
"extensions": [
@ -199,6 +277,30 @@
],
"filenames": []
},
{
"name": "Factor",
"extensions": [
"factor"
],
"filenames": [
".factor-boot-rc",
".factor-rc"
]
},
{
"name": "Forth",
"extensions": [
"fth",
"4th",
"f",
"for",
"forth",
"fr",
"frt",
"fs"
],
"filenames": []
},
{
"name": "Fortran",
"extensions": [
@ -209,6 +311,14 @@
],
"filenames": []
},
{
"name": "Gherkin",
"extensions": [
"feature",
"story"
],
"filenames": []
},
{
"name": "Go",
"extensions": [
@ -237,6 +347,14 @@
],
"filenames": []
},
{
"name": "Haxe",
"extensions": [
"hx",
"hxsl"
],
"filenames": []
},
{
"name": "HTML",
"extensions": [
@ -249,6 +367,28 @@
],
"filenames": []
},
{
"name": "HTTP",
"extensions": [
"http"
],
"filenames": []
},
{
"name": "HXML",
"extensions": [
"hxml"
],
"filenames": []
},
{
"name": "IDL",
"extensions": [
"pro",
"dlm"
],
"filenames": []
},
{
"name": "Java",
"extensions": [
@ -270,6 +410,15 @@
"Jakefile"
]
},
{
"name": "Jinja",
"extensions": [
"jinja",
"j2",
"jinja2"
],
"filenames": []
},
{
"name": "JSON",
"extensions": [
@ -357,6 +506,13 @@
],
"filenames": []
},
{
"name": "Liquid",
"extensions": [
"liquid"
],
"filenames": []
},
{
"name": "LiveScript",
"extensions": [
@ -401,6 +557,13 @@
"contents.lr"
]
},
{
"name": "Modelica",
"extensions": [
"mo"
],
"filenames": []
},
{
"name": "Nginx",
"extensions": [
@ -412,6 +575,29 @@
"nginx.conf"
]
},
{
"name": "NSIS",
"extensions": [
"nsi",
"nsh"
],
"filenames": []
},
{
"name": "Objective-C",
"extensions": [
"m",
"h"
],
"filenames": []
},
{
"name": "Objective-C++",
"extensions": [
"mm"
],
"filenames": []
},
{
"name": "OCaml",
"extensions": [
@ -425,6 +611,13 @@
],
"filenames": []
},
{
"name": "Oz",
"extensions": [
"oz"
],
"filenames": []
},
{
"name": "Pascal",
"extensions": [
@ -478,6 +671,28 @@
"Phakefile"
]
},
{
"name": "PLSQL",
"extensions": [
"pls",
"bdy",
"ddl",
"fnc",
"pck",
"pkb",
"pks",
"plb",
"plsql",
"prc",
"spc",
"sql",
"tpb",
"tps",
"trg",
"vw"
],
"filenames": []
},
{
"name": "PowerShell",
"extensions": [
@ -574,6 +789,13 @@
"expr-dist"
]
},
{
"name": "RPM Spec",
"extensions": [
"spec"
],
"filenames": []
},
{
"name": "Ruby",
"extensions": [
@ -631,6 +853,13 @@
],
"filenames": []
},
{
"name": "SAS",
"extensions": [
"sas"
],
"filenames": []
},
{
"name": "Sass",
"extensions": [
@ -727,6 +956,13 @@
"zshrc"
]
},
{
"name": "Sieve",
"extensions": [
"sieve"
],
"filenames": []
},
{
"name": "Smalltalk",
"extensions": [
@ -735,6 +971,14 @@
],
"filenames": []
},
{
"name": "SPARQL",
"extensions": [
"sparql",
"rq"
],
"filenames": []
},
{
"name": "SQL",
"extensions": [
@ -749,6 +993,13 @@
],
"filenames": []
},
{
"name": "Squirrel",
"extensions": [
"nut"
],
"filenames": []
},
{
"name": "Stylus",
"extensions": [
@ -786,6 +1037,13 @@
"starfield"
]
},
{
"name": "Textile",
"extensions": [
"textile"
],
"filenames": []
},
{
"name": "TOML",
"extensions": [
@ -810,6 +1068,13 @@
],
"filenames": []
},
{
"name": "Turtle",
"extensions": [
"ttl"
],
"filenames": []
},
{
"name": "TypeScript",
"extensions": [
@ -819,6 +1084,13 @@
],
"filenames": []
},
{
"name": "VBScript",
"extensions": [
"vbs"
],
"filenames": []
},
{
"name": "Verilog",
"extensions": [
@ -978,6 +1250,17 @@
"packages.config"
]
},
{
"name": "XQuery",
"extensions": [
"xquery",
"xq",
"xql",
"xqm",
"xqy"
],
"filenames": []
},
{
"name": "YAML",
"extensions": [

View File

@ -1,23 +1,19 @@
#!/usr/bin/env node
import {load as parseYaml} from 'js-yaml';
import {writeFile} from 'node:fs/promises';
import {languages as cmLanguages} from '@codemirror/language-data';
const linguistUrl = 'https://raw.githubusercontent.com/github-linguist/linguist/main/lib/linguist/languages.yml';
// Languages to extract from github-linguist. A bare string means the linguist name
// matches CodeMirror's @codemirror/language-data name; a tuple is [linguist, cm] when
// they differ. Anything not listed falls through to language-data's defaults at runtime.
const languages: Array<string | [string, string]> = [
'C', 'C++', 'C#', 'CMake', ['COBOL', 'Cobol'], 'CSS', 'Clojure', 'CoffeeScript',
'Common Lisp', 'Crystal', 'Cython', 'D', 'Dart', ['Diff', 'diff'], 'Dockerfile',
'Elm', 'Erlang', 'F#', 'Fortran', 'Go', 'Groovy', 'HTML', 'Haskell',
['INI', 'Properties files'], 'JSON', 'Java', 'JavaScript', 'Julia', 'Kotlin',
['Less', 'LESS'], 'LiveScript', 'Lua', 'Markdown', 'Nginx', 'OCaml', 'PHP', 'Pascal',
'Perl', 'PowerShell', ['Protocol Buffer', 'ProtoBuf'], 'Pug', 'Puppet', 'Python', 'R',
'Ruby', 'Rust', 'SCSS', 'SQL', 'Sass', 'Scala', 'Scheme', 'Shell', 'Smalltalk',
'Stylus', 'Swift', 'SystemVerilog', 'TOML', 'TSX', 'Tcl', ['TeX', 'LaTeX'],
'TypeScript', 'VHDL', 'Verilog', 'Vue', 'WebAssembly', 'XML', 'YAML',
];
// Linguist names that don't match the corresponding @codemirror/language-data name.
const renames: Record<string, string> = {
'COBOL': 'Cobol',
'Diff': 'diff',
'INI': 'Properties files',
'Less': 'LESS',
'Protocol Buffer': 'ProtoBuf',
'TeX': 'LaTeX',
};
// Per-language extensions to drop. Use only for extensions that would actively collide
// with another language (e.g. .inc claimed by both PHP and C++) or where the syntax is
@ -52,15 +48,11 @@ async function main() {
if (!res.ok) throw new Error(`fetch ${linguistUrl} failed: ${res.status}`);
const linguist = parseYaml(await res.text()) as Record<string, LinguistEntry>;
const cmNames = new Set(cmLanguages.map((l) => l.name));
const out: CmLanguage[] = [];
const missing: string[] = [];
for (const lang of languages) {
const [linguistName, cmName] = typeof lang === 'string' ? [lang, lang] : lang;
const entry = linguist[linguistName];
if (!entry) {
missing.push(linguistName);
continue;
}
for (const [linguistName, entry] of Object.entries(linguist)) {
const cmName = renames[linguistName] ?? linguistName;
if (!cmNames.has(cmName)) continue;
const exExt = new Set(excludeExt[linguistName]);
// CodeMirror's matchFilename uses /\.([^.]+)$/ to extract the suffix, so multi-dot
// extensions like ".cmake.in" cannot match as extensions and are dropped here.
@ -75,10 +67,6 @@ async function main() {
});
}
if (missing.length) {
console.warn(`linguist entries not found: ${missing.join(', ')}`);
}
out.sort((a, b) => a.name.localeCompare(b.name));
const outPath = new URL('../assets/codemirror-languages.json', import.meta.url);