From a61598884f9d8b686ef56df407cbe6a709128640 Mon Sep 17 00:00:00 2001 From: silverwind Date: Sun, 10 May 2026 06:51:46 +0200 Subject: [PATCH] feat(editor): broaden language detection in web code editor (#37619) Use https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml to substantially improve syntax higlighting in Codemirror. File is generated on-demand only. Signed-off-by: silverwind Co-authored-by: Claude (Opus 4.7) --- Makefile | 4 + assets/codemirror-languages.json | 1277 ++++++++++++++++++++ eslint.config.ts | 6 +- modules/public/vitedev.go | 7 +- tools/generate-codemirror-languages.ts | 95 ++ web_src/js/modules/codeeditor/main.test.ts | 54 + web_src/js/modules/codeeditor/main.ts | 110 +- 7 files changed, 1513 insertions(+), 40 deletions(-) create mode 100644 assets/codemirror-languages.json create mode 100755 tools/generate-codemirror-languages.ts create mode 100644 web_src/js/modules/codeeditor/main.test.ts diff --git a/Makefile b/Makefile index 27b2c30295..bc343eee01 100644 --- a/Makefile +++ b/Makefile @@ -661,6 +661,10 @@ generate-gitignore: ## update gitignore files generate-images: | node_modules ## generate images cd tools && node generate-images.ts $(TAGS) +.PHONY: generate-codemirror-languages +generate-codemirror-languages: | node_modules ## generate codemirror languages + node tools/generate-codemirror-languages.ts + .PHONY: generate-manpage generate-manpage: ## generate manpage @[ -f gitea ] || make backend diff --git a/assets/codemirror-languages.json b/assets/codemirror-languages.json new file mode 100644 index 0000000000..573e7c493b --- /dev/null +++ b/assets/codemirror-languages.json @@ -0,0 +1,1277 @@ +[ + { + "name": "APL", + "extensions": [ + "apl", + "dyalog" + ], + "filenames": [] + }, + { + "name": "ASN.1", + "extensions": [ + "asn", + "asn1" + ], + "filenames": [] + }, + { + "name": "Brainfuck", + "extensions": [ + "b", + "bf" + ], + "filenames": [] + }, + { + "name": "C", + "extensions": [ + "c", + "cats", + "h", + "idc" + ], + "filenames": [] + }, + { + "name": "C#", + "extensions": [ + "cs", + "cake", + "csx", + "linq" + ], + "filenames": [] + }, + { + "name": "C++", + "extensions": [ + "cpp", + "c++", + "cc", + "cp", + "cppm", + "cxx", + "h++", + "hh", + "hpp", + "hxx", + "inl", + "ipp", + "ixx", + "re", + "tcc", + "tpp", + "txx" + ], + "filenames": [] + }, + { + "name": "Clojure", + "extensions": [ + "clj", + "bb", + "boot", + "cl2", + "cljc", + "cljscm", + "cljx", + "hic" + ], + "filenames": [ + "riemann.config" + ] + }, + { + "name": "CMake", + "extensions": [ + "cmake" + ], + "filenames": [ + "CMakeLists.txt" + ] + }, + { + "name": "Cobol", + "extensions": [ + "cob", + "cbl", + "ccp", + "cobol", + "cpy" + ], + "filenames": [] + }, + { + "name": "CoffeeScript", + "extensions": [ + "coffee", + "_coffee", + "cake", + "cjsx", + "iced" + ], + "filenames": [ + "Cakefile" + ] + }, + { + "name": "Common Lisp", + "extensions": [ + "lisp", + "asd", + "cl", + "l", + "lsp", + "ny", + "podsl", + "sexp" + ], + "filenames": [] + }, + { + "name": "CQL", + "extensions": [ + "cql" + ], + "filenames": [] + }, + { + "name": "Crystal", + "extensions": [ + "cr" + ], + "filenames": [] + }, + { + "name": "CSS", + "extensions": [ + "css" + ], + "filenames": [] + }, + { + "name": "Cypher", + "extensions": [ + "cyp", + "cypher" + ], + "filenames": [] + }, + { + "name": "Cython", + "extensions": [ + "pyx", + "pxd", + "pxi" + ], + "filenames": [] + }, + { + "name": "D", + "extensions": [ + "d", + "di" + ], + "filenames": [] + }, + { + "name": "Dart", + "extensions": [ + "dart" + ], + "filenames": [] + }, + { + "name": "diff", + "extensions": [ + "diff", + "patch" + ], + "filenames": [] + }, + { + "name": "Dylan", + "extensions": [ + "dylan", + "dyl", + "intr", + "lid" + ], + "filenames": [] + }, + { + "name": "EBNF", + "extensions": [ + "ebnf" + ], + "filenames": [] + }, + { + "name": "ECL", + "extensions": [ + "ecl", + "eclxml" + ], + "filenames": [] + }, + { + "name": "edn", + "extensions": [ + "edn" + ], + "filenames": [] + }, + { + "name": "Eiffel", + "extensions": [ + "e" + ], + "filenames": [] + }, + { + "name": "Elm", + "extensions": [ + "elm" + ], + "filenames": [] + }, + { + "name": "Erlang", + "extensions": [ + "erl", + "app", + "es", + "escript", + "hrl", + "xrl", + "yrl" + ], + "filenames": [ + "Emakefile", + "rebar.config", + "rebar.config.lock", + "rebar.lock" + ] + }, + { + "name": "F#", + "extensions": [ + "fs", + "fsi", + "fsx" + ], + "filenames": [] + }, + { + "name": "Factor", + "extensions": [ + "factor" + ], + "filenames": [ + ".factor-boot-rc", + ".factor-rc" + ] + }, + { + "name": "Forth", + "extensions": [ + "fth", + "4th", + "forth", + "fr", + "frt" + ], + "filenames": [] + }, + { + "name": "Fortran", + "extensions": [ + "f", + "f77", + "for", + "fpp" + ], + "filenames": [] + }, + { + "name": "Gherkin", + "extensions": [ + "feature", + "story" + ], + "filenames": [] + }, + { + "name": "Go", + "extensions": [ + "go" + ], + "filenames": [] + }, + { + "name": "Groovy", + "extensions": [ + "groovy", + "grt", + "gtpl", + "gvy" + ], + "filenames": [ + "Jenkinsfile" + ] + }, + { + "name": "Haskell", + "extensions": [ + "hs", + "hs-boot", + "hsc" + ], + "filenames": [] + }, + { + "name": "Haxe", + "extensions": [ + "hx", + "hxsl" + ], + "filenames": [] + }, + { + "name": "HTML", + "extensions": [ + "html", + "hta", + "htm", + "xht", + "xhtml" + ], + "filenames": [] + }, + { + "name": "HTTP", + "extensions": [ + "http" + ], + "filenames": [] + }, + { + "name": "HXML", + "extensions": [ + "hxml" + ], + "filenames": [] + }, + { + "name": "IDL", + "extensions": [ + "pro", + "dlm" + ], + "filenames": [] + }, + { + "name": "Java", + "extensions": [ + "java", + "jav", + "jsh" + ], + "filenames": [] + }, + { + "name": "JavaScript", + "extensions": [ + "js", + "_js", + "bones", + "cjs", + "es", + "es6", + "frag", + "gs", + "jake", + "javascript", + "jsb", + "jscad", + "jsfl", + "jslib", + "jsm", + "jspre", + "jss", + "mjs", + "njs", + "pac", + "sjs", + "ssjs", + "xsjs", + "xsjslib" + ], + "filenames": [ + "Jakefile" + ] + }, + { + "name": "Jinja", + "extensions": [ + "jinja", + "j2", + "jinja2" + ], + "filenames": [] + }, + { + "name": "JSON", + "extensions": [ + "json", + "4DForm", + "4DProject", + "avsc", + "geojson", + "gltf", + "har", + "ice", + "JSON-tmLanguage", + "jsonl", + "mcmeta", + "sarif", + "tact", + "tfstate", + "topojson", + "webapp", + "webmanifest", + "yy", + "yyp" + ], + "filenames": [ + ".all-contributorsrc", + ".arcconfig", + ".auto-changelog", + ".c8rc", + ".htmlhintrc", + ".imgbotconfig", + ".nycrc", + ".tern-config", + ".tern-project", + ".watchmanconfig", + "MODULE.bazel.lock", + "Package.resolved", + "Pipfile.lock", + "bun.lock", + "composer.lock", + "deno.lock", + "flake.lock", + "mcmod.info" + ] + }, + { + "name": "JSON-LD", + "extensions": [ + "jsonld" + ], + "filenames": [] + }, + { + "name": "Julia", + "extensions": [ + "jl" + ], + "filenames": [] + }, + { + "name": "Kotlin", + "extensions": [ + "kt", + "ktm", + "kts" + ], + "filenames": [] + }, + { + "name": "LaTeX", + "extensions": [ + "tex", + "aux", + "bbx", + "cbx", + "cls", + "dtx", + "ins", + "lbx", + "ltx", + "mkii", + "mkiv", + "mkvi", + "sty", + "toc" + ], + "filenames": [] + }, + { + "name": "LESS", + "extensions": [ + "less" + ], + "filenames": [] + }, + { + "name": "Liquid", + "extensions": [ + "liquid" + ], + "filenames": [] + }, + { + "name": "LiveScript", + "extensions": [ + "ls", + "_ls" + ], + "filenames": [ + "Slakefile" + ] + }, + { + "name": "Lua", + "extensions": [ + "lua", + "nse", + "p8", + "pd_lua", + "rbxs", + "rockspec", + "wlua" + ], + "filenames": [ + ".luacheckrc" + ] + }, + { + "name": "Modelica", + "extensions": [ + "mo" + ], + "filenames": [] + }, + { + "name": "Nginx", + "extensions": [ + "nginx", + "nginxconf", + "vhost" + ], + "filenames": [ + "nginx.conf" + ] + }, + { + "name": "NSIS", + "extensions": [ + "nsi", + "nsh" + ], + "filenames": [] + }, + { + "name": "Objective-C", + "extensions": [], + "filenames": [] + }, + { + "name": "Objective-C++", + "extensions": [ + "mm" + ], + "filenames": [] + }, + { + "name": "OCaml", + "extensions": [ + "ml", + "eliom", + "eliomi", + "ml4", + "mli", + "mll", + "mly" + ], + "filenames": [] + }, + { + "name": "Oz", + "extensions": [ + "oz" + ], + "filenames": [] + }, + { + "name": "Pascal", + "extensions": [ + "pas", + "dfm", + "dpr", + "lpr", + "pascal" + ], + "filenames": [] + }, + { + "name": "Perl", + "extensions": [ + "pl", + "al", + "perl", + "ph", + "plx", + "pm", + "psgi", + "t" + ], + "filenames": [ + ".latexmkrc", + "Makefile.PL", + "Rexfile", + "ack", + "cpanfile", + "latexmkrc" + ] + }, + { + "name": "PHP", + "extensions": [ + "php", + "aw", + "ctp", + "php3", + "php4", + "php5", + "phps", + "phpt" + ], + "filenames": [ + ".php", + ".php_cs", + ".php_cs.dist", + "Phakefile" + ] + }, + { + "name": "PLSQL", + "extensions": [ + "pls", + "bdy", + "ddl", + "fnc", + "pck", + "pkb", + "pks", + "plb", + "plsql", + "prc", + "spc", + "tpb", + "tps", + "trg", + "vw" + ], + "filenames": [] + }, + { + "name": "PowerShell", + "extensions": [ + "ps1", + "psd1", + "psm1" + ], + "filenames": [] + }, + { + "name": "Properties files", + "extensions": [ + "ini", + "cnf", + "dof", + "lektorproject", + "prefs", + "properties", + "url", + "conf" + ], + "filenames": [ + ".buckconfig", + ".coveragerc", + ".flake8", + ".pylintrc", + "HOSTS", + "buildozer.spec", + "hosts", + "pylintrc", + "vlcrc", + ".editorconfig", + ".gitconfig", + ".npmrc" + ] + }, + { + "name": "ProtoBuf", + "extensions": [ + "proto" + ], + "filenames": [] + }, + { + "name": "Pug", + "extensions": [ + "jade", + "pug" + ], + "filenames": [] + }, + { + "name": "Puppet", + "extensions": [ + "pp" + ], + "filenames": [ + "Modulefile" + ] + }, + { + "name": "Python", + "extensions": [ + "py", + "gyp", + "gypi", + "lmi", + "py3", + "pyde", + "pyi", + "pyp", + "pyt", + "pyw", + "rpy", + "tac", + "wsgi", + "xpy" + ], + "filenames": [ + ".gclient", + "DEPS", + "SConscript", + "SConstruct", + "wscript", + "Snakefile" + ] + }, + { + "name": "Q", + "extensions": [ + "q" + ], + "filenames": [] + }, + { + "name": "R", + "extensions": [ + "r", + "rd", + "rsx" + ], + "filenames": [ + ".Rprofile", + "expr-dist" + ] + }, + { + "name": "RPM Spec", + "extensions": [ + "spec" + ], + "filenames": [] + }, + { + "name": "Ruby", + "extensions": [ + "rb", + "builder", + "eye", + "gemspec", + "god", + "jbuilder", + "mspec", + "pluginspec", + "podspec", + "prawn", + "rabl", + "rake", + "rbi", + "rbuild", + "rbw", + "rbx", + "ru", + "ruby", + "thor", + "watchr" + ], + "filenames": [ + ".irbrc", + ".pryrc", + ".simplecov", + "Appraisals", + "Berksfile", + "Brewfile", + "Buildfile", + "Capfile", + "Dangerfile", + "Deliverfile", + "Fastfile", + "Gemfile", + "Guardfile", + "Jarfile", + "Mavenfile", + "Podfile", + "Puppetfile", + "Rakefile", + "Snapfile", + "Steepfile", + "Thorfile", + "Vagrantfile", + "buildfile" + ] + }, + { + "name": "Rust", + "extensions": [ + "rs" + ], + "filenames": [] + }, + { + "name": "SAS", + "extensions": [ + "sas" + ], + "filenames": [] + }, + { + "name": "Sass", + "extensions": [ + "sass" + ], + "filenames": [] + }, + { + "name": "Scala", + "extensions": [ + "scala", + "kojo", + "sbt", + "sc" + ], + "filenames": [] + }, + { + "name": "Scheme", + "extensions": [ + "scm", + "sch", + "sld", + "sls", + "sps", + "ss" + ], + "filenames": [] + }, + { + "name": "SCSS", + "extensions": [ + "scss" + ], + "filenames": [] + }, + { + "name": "Shell", + "extensions": [ + "sh", + "bash", + "bats", + "command", + "ksh", + "sbatch", + "slurm", + "tmux", + "tool", + "trigger", + "zsh", + "zsh-theme" + ], + "filenames": [ + ".bash_aliases", + ".bash_functions", + ".bash_history", + ".bash_logout", + ".bash_profile", + ".bashrc", + ".cshrc", + ".envrc", + ".flaskenv", + ".kshrc", + ".login", + ".profile", + ".tmux.conf", + ".xinitrc", + ".xsession", + ".zlogin", + ".zlogout", + ".zprofile", + ".zshenv", + ".zshrc", + "9fs", + "PKGBUILD", + "bash_aliases", + "bash_logout", + "bash_profile", + "bashrc", + "cshrc", + "gradlew", + "kshrc", + "login", + "man", + "mvnw", + "profile", + "tmux.conf", + "xinitrc", + "xsession", + "zlogin", + "zlogout", + "zprofile", + "zshenv", + "zshrc" + ] + }, + { + "name": "Sieve", + "extensions": [ + "sieve" + ], + "filenames": [] + }, + { + "name": "Smalltalk", + "extensions": [ + "st" + ], + "filenames": [] + }, + { + "name": "SPARQL", + "extensions": [ + "sparql", + "rq" + ], + "filenames": [] + }, + { + "name": "SQL", + "extensions": [ + "sql", + "ddl", + "mysql", + "prc", + "tab", + "udf", + "viw" + ], + "filenames": [] + }, + { + "name": "Squirrel", + "extensions": [ + "nut" + ], + "filenames": [] + }, + { + "name": "Stylus", + "extensions": [ + "styl" + ], + "filenames": [] + }, + { + "name": "Swift", + "extensions": [ + "swift" + ], + "filenames": [] + }, + { + "name": "SystemVerilog", + "extensions": [ + "sv", + "svh", + "vh" + ], + "filenames": [] + }, + { + "name": "Tcl", + "extensions": [ + "tcl", + "adp", + "sdc", + "tm", + "xdc" + ], + "filenames": [ + "owh", + "starfield" + ] + }, + { + "name": "Textile", + "extensions": [ + "textile" + ], + "filenames": [] + }, + { + "name": "TOML", + "extensions": [ + "toml" + ], + "filenames": [ + "Cargo.lock", + "Cargo.toml.orig", + "Gopkg.lock", + "Pipfile", + "mise.local.lock", + "mise.lock", + "pdm.lock", + "poetry.lock", + "uv.lock" + ] + }, + { + "name": "TSX", + "extensions": [ + "tsx" + ], + "filenames": [] + }, + { + "name": "Turtle", + "extensions": [ + "ttl" + ], + "filenames": [] + }, + { + "name": "TypeScript", + "extensions": [ + "ts", + "cts", + "mts" + ], + "filenames": [] + }, + { + "name": "VBScript", + "extensions": [ + "vbs" + ], + "filenames": [] + }, + { + "name": "Verilog", + "extensions": [ + "veo" + ], + "filenames": [] + }, + { + "name": "VHDL", + "extensions": [ + "vhdl", + "vhd", + "vhf", + "vhi", + "vho", + "vhs", + "vht", + "vhw" + ], + "filenames": [] + }, + { + "name": "Vue", + "extensions": [ + "vue" + ], + "filenames": [] + }, + { + "name": "WebAssembly", + "extensions": [ + "wast", + "wat" + ], + "filenames": [] + }, + { + "name": "XML", + "extensions": [ + "xml", + "adml", + "admx", + "ant", + "axaml", + "axml", + "builds", + "ccproj", + "ccxml", + "clixml", + "cproject", + "cscfg", + "csdef", + "csl", + "csproj", + "ct", + "depproj", + "dita", + "ditamap", + "ditaval", + "dotsettings", + "filters", + "fsproj", + "fxml", + "glade", + "gml", + "gmx", + "gpx", + "grxml", + "gst", + "hzp", + "icls", + "iml", + "ivy", + "jelly", + "jsproj", + "kml", + "launch", + "mdpolicy", + "mjml", + "mod", + "mojo", + "mxml", + "natvis", + "ncl", + "ndproj", + "nproj", + "nuspec", + "odd", + "osm", + "pkgproj", + "pluginspec", + "proj", + "props", + "ps1xml", + "psc1", + "pt", + "pubxml", + "qhelp", + "rdf", + "res", + "resx", + "rss", + "sch", + "scxml", + "sfproj", + "shproj", + "slnx", + "srdf", + "storyboard", + "sublime-snippet", + "sw", + "targets", + "tml", + "typ", + "ui", + "urdf", + "ux", + "vbproj", + "vcxproj", + "vsixmanifest", + "vssettings", + "vstemplate", + "vxml", + "wixproj", + "workflow", + "wsdl", + "wsf", + "wxi", + "wxl", + "wxs", + "x3d", + "xacro", + "xaml", + "xib", + "xlf", + "xliff", + "xmi", + "xmp", + "xproj", + "xsd", + "xspec", + "xul", + "zcml" + ], + "filenames": [ + ".classpath", + ".cproject", + ".project", + "App.config", + "NuGet.config", + "Settings.StyleCop", + "Web.Debug.config", + "Web.Release.config", + "Web.config", + "packages.config" + ] + }, + { + "name": "XQuery", + "extensions": [ + "xquery", + "xq", + "xql", + "xqm", + "xqy" + ], + "filenames": [] + }, + { + "name": "YAML", + "extensions": [ + "yml", + "mir", + "reek", + "rviz", + "sublime-syntax", + "syntax", + "yaml", + "yaml-tmlanguage" + ], + "filenames": [ + ".clang-format", + ".clang-tidy", + ".clangd", + ".gemrc", + "CITATION.cff", + "glide.lock", + "pixi.lock", + "yarn.lock" + ] + } +] diff --git a/eslint.config.ts b/eslint.config.ts index 29016ed808..91adc06e19 100644 --- a/eslint.config.ts +++ b/eslint.config.ts @@ -570,8 +570,6 @@ export default defineConfig([ 'no-redeclare': [0], // must be disabled for typescript overloads 'no-regex-spaces': [2], 'no-restricted-exports': [0], - 'no-restricted-globals': [2, ...restrictedGlobals], - 'no-restricted-properties': [2, ...restrictedProperties], 'no-restricted-imports': [2, {paths: [ {name: 'jquery', message: 'Use the global $ instead', allowTypeImports: true}, ]}], @@ -1022,5 +1020,9 @@ export default defineConfig([ { files: ['web_src/**/*'], languageOptions: {globals: {...globals.browser, ...globals.jquery}}, + rules: { + 'no-restricted-globals': [2, ...restrictedGlobals], + 'no-restricted-properties': [2, ...restrictedProperties], + }, }, ]); diff --git a/modules/public/vitedev.go b/modules/public/vitedev.go index e6be460599..615090b05d 100644 --- a/modules/public/vitedev.go +++ b/modules/public/vitedev.go @@ -192,12 +192,13 @@ func isViteDevRequest(req *http.Request) bool { // Vite uses a path relative to project root and adds "?import" to non-JS/CSS asset imports: // - {WebSite}/public/assets/... (e.g. SVG icons from "{RepoRoot}/public/assets/img/svg/") - // - {WebSite}/assets/emoji.json: it is an exception for the frontend assets, it is imported by JS code, but: + // - {WebSite}/assets/.json: exception for frontend-imported repo-root assets: // - KEEP IN MIND: all static frontend assets are served from "{AssetFS}/assets" to "{WebSite}/assets" by Gitea Web Server // - "{AssetFS}" is a layered filesystem from "{RepoRoot}/public" or embedded assets, and user's custom files in "{CustomPath}/public" - // - "{RepoRoot}/assets/emoji.json" just happens to have the dir name "assets", it is not related to frontend assets + // - "{RepoRoot}/assets/*.json" just happens to live under the dir name "assets"; it is not related to frontend assets // - BAD DESIGN: indeed it is a "conflicted and polluted name" sample - if path == "/assets/emoji.json" { + switch path { + case "/assets/emoji.json", "/assets/codemirror-languages.json": return true } return false diff --git a/tools/generate-codemirror-languages.ts b/tools/generate-codemirror-languages.ts new file mode 100755 index 0000000000..28636dee82 --- /dev/null +++ b/tools/generate-codemirror-languages.ts @@ -0,0 +1,95 @@ +#!/usr/bin/env node +import {load as parseYaml} from 'js-yaml'; +import {writeFile} from 'node:fs/promises'; +import {languages as cmLanguages} from '@codemirror/language-data'; + +const linguistUrl = 'https://raw.githubusercontent.com/github-linguist/linguist/main/lib/linguist/languages.yml'; + +const renames: Record = { + 'Protocol Buffer': 'ProtoBuf', +}; + +// Languages whose entry is constructed manually in the runtime; skip during generation. +const skipNames = new Set(['Dockerfile', 'Markdown']); + +// Extensions claimed by several unrelated languages with no good default; strip globally. +const ambiguousExt = new Set(['cgi', 'fcgi', 'inc']); + +// Per-language drops for non-text formats (.frm = binary VB6 forms) or where Linguist's +// primary owner conflicts with a more specialised CodeMirror mode (.spec → RPM Spec). +const excludeExt: Record = { + 'INI': ['frm'], + 'Python': ['spec'], + 'Ruby': ['spec'], +}; + +// Per-CM-language additions for filenames Linguist classifies as separate languages +// (.editorconfig, .gitconfig, .npmrc) or omits entirely (Snakefile). +const extraFilenames: Record = { + 'Properties files': ['.editorconfig', '.gitconfig', '.npmrc'], + 'Python': ['Snakefile'], +}; + +// Per-CM-language additions widely used in practice but absent from Linguist's list. +const extraExtensions: Record = { + 'Properties files': ['conf'], +}; + +type LinguistEntry = { + type: string; + extensions?: string[]; + filenames?: string[]; +}; + +type CmLanguage = { + name: string; + extensions: string[]; + filenames: string[]; +}; + +const res = await fetch(linguistUrl); +if (!res.ok) throw new Error(`fetch ${linguistUrl} failed: ${res.status}`); +const linguist = parseYaml(await res.text()) as Record; + +const cmByAlias = new Map(); +// Map of extension -> the CM language that originally owns it. Used to prevent Linguist +// from broadening one language's extension claim into another's territory (e.g. Linguist's +// PLSQL lists .sql, but CM's SQL is the canonical owner). +const cmOriginalExtOwner = new Map(); +for (const lang of cmLanguages) { + cmByAlias.set(lang.name.toLowerCase(), lang.name); + for (const a of lang.alias) cmByAlias.set(a.toLowerCase(), lang.name); + for (const ext of lang.extensions) { + if (!cmOriginalExtOwner.has(ext)) cmOriginalExtOwner.set(ext, lang.name); + } +} + +const out: CmLanguage[] = []; +const seen = new Set(); +for (const [linguistName, entry] of Object.entries(linguist)) { + const cmName = renames[linguistName] ?? cmByAlias.get(linguistName.toLowerCase()); + // Multiple Linguist entries can alias to the same CM language (e.g. JSON5 → JSON). + if (!cmName || skipNames.has(cmName) || seen.has(cmName)) continue; + seen.add(cmName); + const exExt = new Set(excludeExt[linguistName]); + // CodeMirror's matchFilename uses /\.([^.]+)$/, so multi-dot extensions like + // ".cmake.in" can't match as extensions and are dropped here. + const extensions = (entry.extensions ?? []) + .map((e) => e.replace(/^\./, '')) + .filter((e) => { + if (e.includes('.') || ambiguousExt.has(e) || exExt.has(e)) return false; + const owner = cmOriginalExtOwner.get(e); + return !owner || owner === cmName; + }); + out.push({ + name: cmName, + extensions: [...extensions, ...(extraExtensions[cmName] ?? [])], + filenames: [...(entry.filenames ?? []), ...(extraFilenames[cmName] ?? [])], + }); +} + +out.sort((a, b) => a.name.localeCompare(b.name)); + +const outPath = new URL('../assets/codemirror-languages.json', import.meta.url); +await writeFile(outPath, `${JSON.stringify(out, null, 2)}\n`); +console.info(`wrote ${out.length} languages to ${outPath.pathname}`); diff --git a/web_src/js/modules/codeeditor/main.test.ts b/web_src/js/modules/codeeditor/main.test.ts new file mode 100644 index 0000000000..8eef2baa97 --- /dev/null +++ b/web_src/js/modules/codeeditor/main.test.ts @@ -0,0 +1,54 @@ +import {buildLanguageDescriptions, importCodemirror} from './main.ts'; + +test('matchFilename — language detection covers extended rules', async () => { + const cm = await importCodemirror(); + const list = buildLanguageDescriptions(cm); + const match = (filename: string) => + cm.language.LanguageDescription.matchFilename(list, filename)?.name; + + // Linguist-supplied filenames + extensions + expect(match('.bashrc')).toBe('Shell'); + expect(match('PKGBUILD')).toBe('Shell'); + expect(match('foo.zsh')).toBe('Shell'); + expect(match('Cargo.lock')).toBe('TOML'); + expect(match('Gemfile')).toBe('Ruby'); + expect(match('foo.gemspec')).toBe('Ruby'); + expect(match('foo.psgi')).toBe('Perl'); + expect(match('foo.pyi')).toBe('Python'); + expect(match('foo.webmanifest')).toBe('JSON'); + expect(match('foo.tcc')).toBe('C++'); + + // Script-side extras (extraFilenames / extraExtensions) + expect(match('.editorconfig')).toBe('Properties files'); + expect(match('foo.conf')).toBe('Properties files'); + expect(match('Snakefile')).toBe('Python'); + + // Custom Gitea entries override language-data + expect(match('Containerfile.test')).toBe('Dockerfile'); + expect(match('Dockerfile.dev')).toBe('Dockerfile'); + expect(match('Makefile.am')).toBe('Makefile'); + expect(match('foo.mk')).toBe('Makefile'); + expect(match('.env.local')).toBe('Dotenv'); + expect(match('foo.json5')).toBe('JSON5'); + expect(match('foo.mdown')).toBe('Markdown'); + + // Filename regex wins over extension match + expect(match('nginx.conf')).toBe('Nginx'); + + // .spec routes to RPM Spec via excludeExt redirect + expect(match('foo.spec')).toBe('RPM Spec'); + + // CM original ownership preserved against Linguist's broader claims (.sql is SQL, + // not PLSQL, even though Linguist's PLSQL extension list includes it). + expect(match('foo.sql')).toBe('SQL'); + expect(match('foo.h')).toBe('C'); + expect(match('foo.mm')).toBe('Objective-C++'); + + // Globally ambiguous extensions fall through to plain text + expect(match('foo.cgi')).toBeUndefined(); + expect(match('foo.inc')).toBeUndefined(); + + // Smoke: existing language-data entries still resolve + expect(match('foo.go')).toBe('Go'); + expect(match('foo.tsx')).toBe('TSX'); +}); diff --git a/web_src/js/modules/codeeditor/main.ts b/web_src/js/modules/codeeditor/main.ts index e847f357cb..8feea2b475 100644 --- a/web_src/js/modules/codeeditor/main.ts +++ b/web_src/js/modules/codeeditor/main.ts @@ -41,10 +41,12 @@ export type CodemirrorEditor = { }; }; +type LinguistLanguage = {name: string; extensions: string[]; filenames: string[]}; + export type CodemirrorModules = Awaited>; -async function importCodemirror() { - const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap] = await Promise.all([ +export async function importCodemirror() { + const [autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguist] = await Promise.all([ import('@codemirror/autocomplete'), import('@codemirror/commands'), import('@codemirror/language'), @@ -56,8 +58,77 @@ async function importCodemirror() { import('@lezer/highlight'), import('@replit/codemirror-indentation-markers'), import('@replit/codemirror-vscode-keymap'), + import('../../../../assets/codemirror-languages.json'), ]); - return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap}; + return {autocomplete, commands, language, languageData, lint, search, state, view, highlight, indentMarkers, vscodeKeymap, linguistLanguages: linguist.default as LinguistLanguage[]}; +} + +const escapeRegex = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +const filenameUnion = (filenames: string[]) => + filenames.length ? new RegExp(`^(${filenames.map(escapeRegex).join('|')})$`) : undefined; + +export function buildLanguageDescriptions(cm: CodemirrorModules): LanguageDescription[] { + const list: LanguageDescription[] = [ + ...buildBaseLanguages(cm), + cm.language.LanguageDescription.of({ + name: 'Markdown', extensions: ['md', 'markdown', 'mkd', 'mdown', 'mdwn', 'mkdn', 'mkdown'], + load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: list}), + }), + cm.language.LanguageDescription.of({ + name: 'Dockerfile', extensions: ['dockerfile', 'containerfile'], + filename: /^(Containerfile|Dockerfile)(\..+)?$/i, + load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/dockerfile')).dockerFile)), + }), + cm.language.LanguageDescription.of({ + name: 'Elixir', extensions: ['ex', 'exs'], + load: async () => (await import('codemirror-lang-elixir')).elixir(), + }), + cm.language.LanguageDescription.of({ + name: 'Nix', extensions: ['nix'], + load: async () => (await import('@replit/codemirror-lang-nix')).nix(), + }), + cm.language.LanguageDescription.of({ + name: 'Svelte', extensions: ['svelte'], + load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(), + }), + cm.language.LanguageDescription.of({ + name: 'Makefile', extensions: ['mk', 'mak', 'make'], filename: /^(GNU|BSD)?[Mm]akefile(\..+)?$/, + load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)), + }), + cm.language.LanguageDescription.of({ + name: 'Dotenv', extensions: ['env'], filename: /^\.env(\..*)?$/, + load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)), + }), + cm.language.LanguageDescription.of({ + name: 'JSON5', extensions: ['json5', 'jsonc'], + load: async () => (await import('@codemirror/lang-json')).json(), + }), + ]; + return list; +} + +// Languages that the JSON omits because they're constructed manually above. +const customNames = new Set(['Dockerfile', 'Markdown']); + +let baseLanguagesCache: LanguageDescription[] | null = null; +function buildBaseLanguages(cm: CodemirrorModules): LanguageDescription[] { + if (baseLanguagesCache) return baseLanguagesCache; + const loadByName = new Map( + cm.languageData.languages.map((l: LanguageDescription) => [l.name, l.load.bind(l)]), + ); + const overrides = cm.linguistLanguages + .filter((l) => loadByName.has(l.name)) + .map((l) => cm.language.LanguageDescription.of({ + name: l.name, + extensions: l.extensions, + filename: filenameUnion(l.filenames), + load: loadByName.get(l.name)!, + })); + const overrideNames = new Set(overrides.map((o) => o.name)); + const fallback = cm.languageData.languages.filter( + (l: LanguageDescription) => !overrideNames.has(l.name) && !customNames.has(l.name), + ); + return baseLanguagesCache = [...overrides, ...fallback]; } function togglePreviewDisplay(previewable: boolean): void { @@ -85,38 +156,7 @@ export async function createCodeEditor(textarea: HTMLTextAreaElement, filenameIn const previewableExts = new Set(config.previewableExtensions || []); const lineWrapExts = config.lineWrapExtensions || []; const cm = await importCodemirror(); - - const languageDescriptions: LanguageDescription[] = [ - ...cm.languageData.languages.filter((l: LanguageDescription) => l.name !== 'Markdown'), - cm.language.LanguageDescription.of({ - name: 'Markdown', extensions: ['md', 'markdown', 'mkd'], - load: async () => (await import('@codemirror/lang-markdown')).markdown({codeLanguages: languageDescriptions}), - }), - cm.language.LanguageDescription.of({ - name: 'Elixir', extensions: ['ex', 'exs'], - load: async () => (await import('codemirror-lang-elixir')).elixir(), - }), - cm.language.LanguageDescription.of({ - name: 'Nix', extensions: ['nix'], - load: async () => (await import('@replit/codemirror-lang-nix')).nix(), - }), - cm.language.LanguageDescription.of({ - name: 'Svelte', extensions: ['svelte'], - load: async () => (await import('@replit/codemirror-lang-svelte')).svelte(), - }), - cm.language.LanguageDescription.of({ - name: 'Makefile', filename: /^(GNUm|M|m)akefile$/, - load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)), - }), - cm.language.LanguageDescription.of({ - name: 'Dotenv', extensions: ['env'], filename: /^\.env(\..*)?$/, - load: async () => new cm.language.LanguageSupport(cm.language.StreamLanguage.define((await import('@codemirror/legacy-modes/mode/shell')).shell)), - }), - cm.language.LanguageDescription.of({ - name: 'JSON5', extensions: ['json5', 'jsonc'], - load: async () => (await import('@codemirror/lang-json')).json(), - }), - ]; + const languageDescriptions = buildLanguageDescriptions(cm); const matchedLang = cm.language.LanguageDescription.matchFilename(languageDescriptions, config.filename); const container = document.createElement('div');