|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "regexp" |
| 5 | + "strings" |
| 6 | + "unicode" |
| 7 | +) |
| 8 | + |
| 9 | +var ( |
| 10 | + // mdHeading matches MarkDown H1..h6 headings. Note that this regex may produce |
| 11 | + // false positives for (e.g.) comments in code-blocks (# this is a comment), |
| 12 | + // so should not be used as a generic regex for other purposes. |
| 13 | + mdHeading = regexp.MustCompile(`^([#]{1,6})\s(.*)$`) |
| 14 | + // htmlAnchor matches inline HTML anchors. This is intended to only match anchors |
| 15 | + // for our use-case; DO NOT consider using this as a generic regex, or at least |
| 16 | + // not before reading https://stackoverflow.com/a/1732454/1811501. |
| 17 | + htmlAnchor = regexp.MustCompile(`<a\s+(?:name|id)="?([^"]+)"?\s*></a>\s*`) |
| 18 | +) |
| 19 | + |
| 20 | +// getSections returns all H2 sections by title (lowercase) |
| 21 | +func getSections(mdString string) map[string]string { |
| 22 | + parsedContent := strings.Split("\n"+mdString, "\n## ") |
| 23 | + sections := make(map[string]string, len(parsedContent)) |
| 24 | + for _, s := range parsedContent { |
| 25 | + if strings.HasPrefix(s, "#") { |
| 26 | + // not a H2 Section |
| 27 | + continue |
| 28 | + } |
| 29 | + parts := strings.SplitN(s, "\n", 2) |
| 30 | + if len(parts) == 2 { |
| 31 | + sections[strings.ToLower(parts[0])] = parts[1] |
| 32 | + } |
| 33 | + } |
| 34 | + return sections |
| 35 | +} |
| 36 | + |
| 37 | +// cleanupMarkDown cleans up the MarkDown passed in mdString for inclusion in |
| 38 | +// YAML. It removes trailing whitespace and substitutes tabs for four spaces |
| 39 | +// to prevent YAML switching to use "compact" form; ("line1 \nline\t2\n") |
| 40 | +// which, although equivalent, is hard to read. |
| 41 | +func cleanupMarkDown(mdString string) (md string, anchors []string) { |
| 42 | + // remove leading/trailing whitespace, and replace tabs in the whole content |
| 43 | + mdString = strings.TrimSpace(mdString) |
| 44 | + mdString = strings.ReplaceAll(mdString, "\t", " ") |
| 45 | + mdString = strings.ReplaceAll(mdString, "https://docs.docker.com", "") |
| 46 | + |
| 47 | + var id string |
| 48 | + // replace trailing whitespace per line, and handle custom anchors |
| 49 | + lines := strings.Split(mdString, "\n") |
| 50 | + for i := 0; i < len(lines); i++ { |
| 51 | + lines[i] = strings.TrimRightFunc(lines[i], unicode.IsSpace) |
| 52 | + lines[i], id = convertHTMLAnchor(lines[i]) |
| 53 | + if id != "" { |
| 54 | + anchors = append(anchors, id) |
| 55 | + } |
| 56 | + } |
| 57 | + return strings.Join(lines, "\n"), anchors |
| 58 | +} |
| 59 | + |
| 60 | +// convertHTMLAnchor converts inline anchor-tags in headings (<a name=myanchor></a>) |
| 61 | +// to an extended-markdown property ({#myanchor}). Extended Markdown properties |
| 62 | +// are not supported in GitHub Flavored Markdown, but are supported by Jekyll, |
| 63 | +// and lead to cleaner HTML in our docs, and prevents duplicate anchors. |
| 64 | +// It returns the converted MarkDown heading and the custom ID (if present) |
| 65 | +func convertHTMLAnchor(mdLine string) (md string, customID string) { |
| 66 | + if m := mdHeading.FindStringSubmatch(mdLine); len(m) > 0 { |
| 67 | + if a := htmlAnchor.FindStringSubmatch(m[2]); len(a) > 0 { |
| 68 | + customID = a[1] |
| 69 | + mdLine = m[1] + " " + htmlAnchor.ReplaceAllString(m[2], "") + " {#" + customID + "}" |
| 70 | + } |
| 71 | + } |
| 72 | + return mdLine, customID |
| 73 | +} |
0 commit comments