From c35535ce071c221fa0e529b89c3df462df769377 Mon Sep 17 00:00:00 2001
From: Gary Wang <wzc782970009@gmail.com>
Date: Mon, 1 Aug 2022 06:36:58 +0800
Subject: [PATCH] Support localized README (#20508)

* Support localized README

* Slightly simplify getting the readme file and add some tests. Ensure that i18n also
works for docs/ etc.

Signed-off-by: Andrew Thornton <art27@cantab.net>

* Update modules/markup/renderer.go

* Update modules/markup/renderer.go

* Update modules/markup/renderer.go

Co-authored-by: Andrew Thornton <art27@cantab.net>
---
 modules/markup/renderer.go      |  33 +++++++--
 modules/markup/renderer_test.go |  55 +++++++++-----
 routers/web/repo/view.go        | 123 ++++++++++++++------------------
 routers/web/repo/view_test.go   |  63 ++++++++++++++++
 4 files changed, 182 insertions(+), 92 deletions(-)
 create mode 100644 routers/web/repo/view_test.go

diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go
index e88fa31187..9278d4c65d 100644
--- a/modules/markup/renderer.go
+++ b/modules/markup/renderer.go
@@ -310,14 +310,9 @@ func IsMarkupFile(name, markup string) bool {
 }
 
 // IsReadmeFile reports whether name looks like a README file
-// based on its name. If an extension is provided, it will strictly
-// match that extension.
-// Note that the '.' should be provided in ext, e.g ".md"
-func IsReadmeFile(name string, ext ...string) bool {
+// based on its name.
+func IsReadmeFile(name string) bool {
 	name = strings.ToLower(name)
-	if len(ext) > 0 {
-		return name == "readme"+ext[0]
-	}
 	if len(name) < 6 {
 		return false
 	} else if len(name) == 6 {
@@ -325,3 +320,27 @@ func IsReadmeFile(name string, ext ...string) bool {
 	}
 	return name[:7] == "readme."
 }
+
+// IsReadmeFileExtension reports whether name looks like a README file
+// based on its name. It will look through the provided extensions and check if the file matches
+// one of the extensions and provide the index in the extension list.
+// If the filename is `readme.` with an unmatched extension it will match with the index equaling
+// the length of the provided extension list.
+// Note that the '.' should be provided in ext, e.g ".md"
+func IsReadmeFileExtension(name string, ext ...string) (int, bool) {
+	if len(name) < 6 || name[:6] != "readme" {
+		return 0, false
+	}
+
+	for i, extension := range ext {
+		if name[6:] == extension {
+			return i, true
+		}
+	}
+
+	if name[6] == '.' {
+		return len(ext), true
+	}
+
+	return 0, false
+}
diff --git a/modules/markup/renderer_test.go b/modules/markup/renderer_test.go
index 4cfa022463..1e0f7db194 100644
--- a/modules/markup/renderer_test.go
+++ b/modules/markup/renderer_test.go
@@ -40,24 +40,47 @@ func TestMisc_IsReadmeFile(t *testing.T) {
 		assert.False(t, IsReadmeFile(testCase))
 	}
 
-	trueTestCasesStrict := [][]string{
-		{"readme", ""},
-		{"readme.md", ".md"},
-		{"readme.txt", ".txt"},
-	}
-	falseTestCasesStrict := [][]string{
-		{"readme", ".md"},
-		{"readme.md", ""},
-		{"readme.md", ".txt"},
-		{"readme.md", "md"},
-		{"readmee.md", ".md"},
-		{"readme.i18n.md", ".md"},
+	type extensionTestcase struct {
+		name     string
+		expected bool
+		idx      int
 	}
 
-	for _, testCase := range trueTestCasesStrict {
-		assert.True(t, IsReadmeFile(testCase[0], testCase[1]))
+	exts := []string{".md", ".txt", ""}
+	testCasesExtensions := []extensionTestcase{
+		{
+			name:     "readme",
+			expected: true,
+			idx:      2,
+		},
+		{
+			name:     "readme.md",
+			expected: true,
+			idx:      0,
+		},
+		{
+			name:     "readme.txt",
+			expected: true,
+			idx:      1,
+		},
+		{
+			name:     "readme.doc",
+			expected: true,
+			idx:      3,
+		},
+		{
+			name: "readmee.md",
+		},
+		{
+			name:     "readme..",
+			expected: true,
+			idx:      3,
+		},
 	}
-	for _, testCase := range falseTestCasesStrict {
-		assert.False(t, IsReadmeFile(testCase[0], testCase[1]))
+
+	for _, testCase := range testCasesExtensions {
+		idx, ok := IsReadmeFileExtension(testCase.name, exts...)
+		assert.Equal(t, testCase.expected, ok)
+		assert.Equal(t, testCase.idx, idx)
 	}
 }
diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go
index c5657aa675..6a9c6b9bba 100644
--- a/routers/web/repo/view.go
+++ b/routers/web/repo/view.go
@@ -57,7 +57,7 @@ type namedBlob struct {
 }
 
 // FIXME: There has to be a more efficient way of doing this
-func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) {
+func getReadmeFileFromPath(ctx *context.Context, commit *git.Commit, treePath string) (*namedBlob, error) {
 	tree, err := commit.SubTree(treePath)
 	if err != nil {
 		return nil, err
@@ -68,50 +68,33 @@ func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, err
 		return nil, err
 	}
 
-	var readmeFiles [4]*namedBlob
-	exts := []string{".md", ".txt", ""} // sorted by priority
+	// Create a list of extensions in priority order
+	// 1. Markdown files - with and without localisation - e.g. README.en-us.md or README.md
+	// 2. Txt files - e.g. README.txt
+	// 3. No extension - e.g. README
+	exts := append(localizedExtensions(".md", ctx.Language()), ".txt", "") // sorted by priority
+	extCount := len(exts)
+	readmeFiles := make([]*namedBlob, extCount+1)
 	for _, entry := range entries {
 		if entry.IsDir() {
 			continue
 		}
-		for i, ext := range exts {
-			if markup.IsReadmeFile(entry.Name(), ext) {
-				if readmeFiles[i] == nil || base.NaturalSortLess(readmeFiles[i].name, entry.Blob().Name()) {
-					name := entry.Name()
-					isSymlink := entry.IsLink()
-					target := entry
-					if isSymlink {
-						target, err = entry.FollowLinks()
-						if err != nil && !git.IsErrBadLink(err) {
-							return nil, err
-						}
-					}
-					if target != nil && (target.IsExecutable() || target.IsRegular()) {
-						readmeFiles[i] = &namedBlob{
-							name,
-							isSymlink,
-							target.Blob(),
-						}
-					}
-				}
-			}
-		}
-
-		if markup.IsReadmeFile(entry.Name()) {
-			if readmeFiles[3] == nil || base.NaturalSortLess(readmeFiles[3].name, entry.Blob().Name()) {
+		if i, ok := markup.IsReadmeFileExtension(entry.Name(), exts...); ok {
+			if readmeFiles[i] == nil || base.NaturalSortLess(readmeFiles[i].name, entry.Blob().Name()) {
 				name := entry.Name()
 				isSymlink := entry.IsLink()
+				target := entry
 				if isSymlink {
-					entry, err = entry.FollowLinks()
+					target, err = entry.FollowLinks()
 					if err != nil && !git.IsErrBadLink(err) {
 						return nil, err
 					}
 				}
-				if entry != nil && (entry.IsExecutable() || entry.IsRegular()) {
-					readmeFiles[3] = &namedBlob{
+				if target != nil && (target.IsExecutable() || target.IsRegular()) {
+					readmeFiles[i] = &namedBlob{
 						name,
 						isSymlink,
-						entry.Blob(),
+						target.Blob(),
 					}
 				}
 			}
@@ -151,13 +134,38 @@ func renderDirectory(ctx *context.Context, treeLink string) {
 	renderReadmeFile(ctx, readmeFile, readmeTreelink)
 }
 
+// localizedExtensions prepends the provided language code with and without a
+// regional identifier to the provided extenstion.
+// Note: the language code will always be lower-cased, if a region is present it must be separated with a `-`
+// Note: ext should be prefixed with a `.`
+func localizedExtensions(ext, languageCode string) (localizedExts []string) {
+	if len(languageCode) < 1 {
+		return []string{ext}
+	}
+
+	lowerLangCode := "." + strings.ToLower(languageCode)
+
+	if strings.Contains(lowerLangCode, "-") {
+		underscoreLangCode := strings.ReplaceAll(lowerLangCode, "-", "_")
+		indexOfDash := strings.Index(lowerLangCode, "-")
+		// e.g. [.zh-cn.md, .zh_cn.md, .zh.md, .md]
+		return []string{lowerLangCode + ext, underscoreLangCode + ext, lowerLangCode[:indexOfDash] + ext, ext}
+	}
+
+	// e.g. [.en.md, .md]
+	return []string{lowerLangCode + ext, ext}
+}
+
 func findReadmeFile(ctx *context.Context, entries git.Entries, treeLink string) (*namedBlob, string) {
-	// 3 for the extensions in exts[] in order
-	// the last one is for a readme that doesn't
-	// strictly match an extension
-	var readmeFiles [4]*namedBlob
-	var docsEntries [3]*git.TreeEntry
-	exts := []string{".md", ".txt", ""} // sorted by priority
+	// Create a list of extensions in priority order
+	// 1. Markdown files - with and without localisation - e.g. README.en-us.md or README.md
+	// 2. Txt files - e.g. README.txt
+	// 3. No extension - e.g. README
+	exts := append(localizedExtensions(".md", ctx.Language()), ".txt", "") // sorted by priority
+	extCount := len(exts)
+	readmeFiles := make([]*namedBlob, extCount+1)
+
+	docsEntries := make([]*git.TreeEntry, 3) // (one of docs/, .gitea/ or .github/)
 	for _, entry := range entries {
 		if entry.IsDir() {
 			lowerName := strings.ToLower(entry.Name())
@@ -178,47 +186,24 @@ func findReadmeFile(ctx *context.Context, entries git.Entries, treeLink string)
 			continue
 		}
 
-		for i, ext := range exts {
-			if markup.IsReadmeFile(entry.Name(), ext) {
-				log.Debug("%s", entry.Name())
-				name := entry.Name()
-				isSymlink := entry.IsLink()
-				target := entry
-				if isSymlink {
-					var err error
-					target, err = entry.FollowLinks()
-					if err != nil && !git.IsErrBadLink(err) {
-						ctx.ServerError("FollowLinks", err)
-						return nil, ""
-					}
-				}
-				log.Debug("%t", target == nil)
-				if target != nil && (target.IsExecutable() || target.IsRegular()) {
-					readmeFiles[i] = &namedBlob{
-						name,
-						isSymlink,
-						target.Blob(),
-					}
-				}
-			}
-		}
-
-		if markup.IsReadmeFile(entry.Name()) {
+		if i, ok := markup.IsReadmeFileExtension(entry.Name(), exts...); ok {
+			log.Debug("Potential readme file: %s", entry.Name())
 			name := entry.Name()
 			isSymlink := entry.IsLink()
+			target := entry
 			if isSymlink {
 				var err error
-				entry, err = entry.FollowLinks()
+				target, err = entry.FollowLinks()
 				if err != nil && !git.IsErrBadLink(err) {
 					ctx.ServerError("FollowLinks", err)
 					return nil, ""
 				}
 			}
-			if entry != nil && (entry.IsExecutable() || entry.IsRegular()) {
-				readmeFiles[3] = &namedBlob{
+			if target != nil && (target.IsExecutable() || target.IsRegular()) {
+				readmeFiles[i] = &namedBlob{
 					name,
 					isSymlink,
-					entry.Blob(),
+					target.Blob(),
 				}
 			}
 		}
@@ -239,7 +224,7 @@ func findReadmeFile(ctx *context.Context, entries git.Entries, treeLink string)
 				continue
 			}
 			var err error
-			readmeFile, err = getReadmeFileFromPath(ctx.Repo.Commit, entry.GetSubJumpablePathName())
+			readmeFile, err = getReadmeFileFromPath(ctx, ctx.Repo.Commit, entry.GetSubJumpablePathName())
 			if err != nil {
 				ctx.ServerError("getReadmeFileFromPath", err)
 				return nil, ""
diff --git a/routers/web/repo/view_test.go b/routers/web/repo/view_test.go
new file mode 100644
index 0000000000..9d5a88fca4
--- /dev/null
+++ b/routers/web/repo/view_test.go
@@ -0,0 +1,63 @@
+// Copyright 2017 The Gitea Authors. All rights reserved.
+// Copyright 2014 The Gogs Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package repo
+
+import (
+	"reflect"
+	"testing"
+)
+
+func Test_localizedExtensions(t *testing.T) {
+	tests := []struct {
+		name              string
+		ext               string
+		languageCode      string
+		wantLocalizedExts []string
+	}{
+		{
+			name:              "empty language",
+			ext:               ".md",
+			wantLocalizedExts: []string{".md"},
+		},
+		{
+			name:              "No region - lowercase",
+			languageCode:      "en",
+			ext:               ".csv",
+			wantLocalizedExts: []string{".en.csv", ".csv"},
+		},
+		{
+			name:              "No region - uppercase",
+			languageCode:      "FR",
+			ext:               ".txt",
+			wantLocalizedExts: []string{".fr.txt", ".txt"},
+		},
+		{
+			name:              "With region - lowercase",
+			languageCode:      "en-us",
+			ext:               ".md",
+			wantLocalizedExts: []string{".en-us.md", ".en_us.md", ".en.md", ".md"},
+		},
+		{
+			name:              "With region - uppercase",
+			languageCode:      "en-CA",
+			ext:               ".MD",
+			wantLocalizedExts: []string{".en-ca.MD", ".en_ca.MD", ".en.MD", ".MD"},
+		},
+		{
+			name:              "With region - all uppercase",
+			languageCode:      "ZH-TW",
+			ext:               ".md",
+			wantLocalizedExts: []string{".zh-tw.md", ".zh_tw.md", ".zh.md", ".md"},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if gotLocalizedExts := localizedExtensions(tt.ext, tt.languageCode); !reflect.DeepEqual(gotLocalizedExts, tt.wantLocalizedExts) {
+				t.Errorf("localizedExtensions() = %v, want %v", gotLocalizedExts, tt.wantLocalizedExts)
+			}
+		})
+	}
+}