-
Notifications
You must be signed in to change notification settings - Fork 9.6k
misc(scripts): improve collision check in collect-strings #12697
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,7 @@ | |
| const fs = require('fs'); | ||
| const glob = require('glob'); | ||
| const path = require('path'); | ||
| const assert = require('assert').strict; | ||
| const expect = require('expect'); | ||
| const tsc = require('typescript'); | ||
| const MessageParser = require('intl-messageformat-parser').default; | ||
| const Util = require('../../../report/renderer/util.js'); | ||
|
|
@@ -517,15 +517,6 @@ function parseUIStrings(sourceStr, liveUIStrings) { | |
| return parsedMessages; | ||
| } | ||
|
|
||
| /** @type {Map<string, string>} */ | ||
| const seenStrings = new Map(); | ||
|
|
||
| /** @type {number} */ | ||
| let collisions = 0; | ||
|
|
||
| /** @type {Array<string>} */ | ||
| const collisionStrings = []; | ||
|
|
||
| /** | ||
| * Collects all LHL messsages defined in UIString from Javascript files in dir, | ||
| * and converts them into CTC. | ||
|
|
@@ -584,28 +575,6 @@ function collectAllStringsInDir(dir) { | |
|
|
||
| const messageKey = `${relativeToRootPath} | ${key}`; | ||
| strings[messageKey] = ctc; | ||
|
|
||
| // check for duplicates, if duplicate, add @description as @meaning to both | ||
| if (seenStrings.has(ctc.message)) { | ||
| ctc.meaning = ctc.description; | ||
| const seenId = seenStrings.get(ctc.message); | ||
| // TODO: `strings[seenId]` check shouldn't be necessary here ... | ||
| // see https://github.com/GoogleChrome/lighthouse/pull/12441/files#r630521367 | ||
| if (seenId && strings[seenId]) { | ||
| if (!strings[seenId].meaning) { | ||
| strings[seenId].meaning = strings[seenId].description; | ||
| collisions++; | ||
| } | ||
|
|
||
| if (ctc.meaning === strings[seenId].meaning) { | ||
| throw new Error(`'${messageKey}' is an exact duplicate of '${seenId}' when placeholders are removed. Each strings' \`message\` or \`description\` must be different for the translation pipeline`); | ||
| } | ||
|
|
||
| collisionStrings.push(ctc.message); | ||
| collisions++; | ||
| } | ||
| } | ||
| seenStrings.set(ctc.message, messageKey); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -628,6 +597,98 @@ function writeStringsToCtcFiles(locale, strings) { | |
| fs.writeFileSync(fullPath, JSON.stringify(output, null, 2) + '\n'); | ||
| } | ||
|
|
||
| /** | ||
| * This function does three things: | ||
| * | ||
| * - Add `meaning` property to ctc messages that have the same message so TC can disambiguate (otherwise it fails to import). | ||
| * - Throw if the `meaning` of any collisions *also* collides (can't disambiguate messages). | ||
| * - Throw if the known collisions has changed at all. | ||
| * | ||
| * @param {Record<string, CtcMessage>} strings | ||
| */ | ||
| function resolveMessageCollisions(strings) { | ||
| /** @type {Map<string, Array<[string, CtcMessage]>>} */ | ||
| const stringsByMessage = new Map(); | ||
|
|
||
| // Group all the strings by their message. | ||
| for (const entry of Object.entries(strings)) { | ||
| const collisions = stringsByMessage.get(entry[1].message) || []; | ||
| collisions.push(entry); | ||
| stringsByMessage.set(entry[1].message, collisions); | ||
| } | ||
|
|
||
| /** @type {Array<[string, CtcMessage]>} */ | ||
| const allCollisions = []; | ||
| for (const group of stringsByMessage.values()) { | ||
| // If this message didn't collide with anything else, skip it. | ||
| if (group.length <= 1) continue; | ||
| allCollisions.push(...group); | ||
|
|
||
| // We have a message collision, time to check collisions on the `meaning` property. | ||
| /** @type {Map<string|undefined, Array<[string, CtcMessage]>>} */ | ||
| const stringsByMeaning = new Map(); | ||
| for (const [key, ctc] of group) { | ||
| ctc.meaning = ctc.description; | ||
|
|
||
| const collisions = stringsByMeaning.get(ctc.meaning) || []; | ||
| collisions.push([key, ctc]); | ||
| stringsByMeaning.set(ctc.meaning, collisions); | ||
| } | ||
|
|
||
| for (const meaningGroup of stringsByMeaning.values()) { | ||
| if (meaningGroup.length <= 1) continue; | ||
|
|
||
| const debugMeaningList = meaningGroup.map(entry => [entry[0], entry[1].meaning].join('\n')); | ||
| const debugCollisionsMessage = `${meaningGroup[0][1].message}\n\n${debugMeaningList.join('\n\n')}`; | ||
| throw new Error(`Each strings' \`message\` or \`description\` must be different for the translation pipeline. The following keys did not have unique \`meaning\` values:\n\n${debugCollisionsMessage}`); | ||
| } | ||
| } | ||
|
|
||
| // We survived fatal collisions, now check that the known collisions match our known list. | ||
| const collidingMessages = allCollisions.map(collision => collision[1].message).sort(); | ||
|
|
||
| try { | ||
| expect(collidingMessages).toEqual([ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. another option is strings and counts per string, since e.g. these first a11y strings are never going to reduce in number (they're distinct as LHL messages but not CTC ones), and neither will the stack pack ones without some major rearchitecting, but it might also be good to wait to see how this works over the next few string changes to see if it would be actually beneficial to tweak this at all |
||
| '$MARKDOWN_SNIPPET_0$ elements do not have $MARKDOWN_SNIPPET_1$ text', | ||
adamraine marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| '$MARKDOWN_SNIPPET_0$ elements do not have $MARKDOWN_SNIPPET_1$ text', | ||
| '$MARKDOWN_SNIPPET_0$ elements have $MARKDOWN_SNIPPET_1$ text', | ||
| '$MARKDOWN_SNIPPET_0$ elements have $MARKDOWN_SNIPPET_1$ text', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements do not have accessible names.', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements do not have accessible names.', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements do not have accessible names.', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements do not have accessible names.', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements have accessible names', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements have accessible names', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements have accessible names', | ||
| 'ARIA $MARKDOWN_SNIPPET_0$ elements have accessible names', | ||
| 'Consider uploading your GIF to a service which will make it available to embed as an HTML5 video.', | ||
| 'Consider uploading your GIF to a service which will make it available to embed as an HTML5 video.', | ||
| 'Consider uploading your GIF to a service which will make it available to embed as an HTML5 video.', | ||
| 'Consider using a $LINK_START_0$plugin$LINK_END_0$ or service that will automatically convert your uploaded images to the optimal formats.', | ||
| 'Consider using a $LINK_START_0$plugin$LINK_END_0$ or service that will automatically convert your uploaded images to the optimal formats.', | ||
| 'Document has a valid $MARKDOWN_SNIPPET_0$', | ||
| 'Document has a valid $MARKDOWN_SNIPPET_0$', | ||
| 'Failing Elements', | ||
| 'Failing Elements', | ||
| 'Name', | ||
| 'Name', | ||
| 'Potential Savings', | ||
| 'Potential Savings', | ||
| 'URL', | ||
| 'URL', | ||
| 'When an element doesn\'t have an accessible name, screen readers announce it with a generic name, making it unusable for users who rely on screen readers. $LINK_START_0$Learn more$LINK_END_0$.', | ||
| 'When an element doesn\'t have an accessible name, screen readers announce it with a generic name, making it unusable for users who rely on screen readers. $LINK_START_0$Learn more$LINK_END_0$.', | ||
| 'When an element doesn\'t have an accessible name, screen readers announce it with a generic name, making it unusable for users who rely on screen readers. $LINK_START_0$Learn more$LINK_END_0$.', | ||
| 'When an element doesn\'t have an accessible name, screen readers announce it with a generic name, making it unusable for users who rely on screen readers. $LINK_START_0$Learn more$LINK_END_0$.', | ||
| ]); | ||
| } catch (err) { | ||
| console.log('The number of duplicate strings have changed, update this assertion if that is expected, or reword strings'); | ||
| console.log('copy/paste this to pass check:'); | ||
| console.log(collidingMessages); | ||
| throw new Error(err.message); | ||
| } | ||
| } | ||
|
|
||
| // Test if called from the CLI or as a module. | ||
| if (require.main === module) { | ||
| /** @type {Record<string, CtcMessage>} */ | ||
|
|
@@ -639,10 +700,7 @@ if (require.main === module) { | |
| Object.assign(strings, moreStrings); | ||
| } | ||
|
|
||
| if (collisions > 0) { | ||
| console.log(`MEANING COLLISION: ${collisions} string(s) have the same content.`); | ||
| assert.equal(collisions, 28, `The number of duplicate strings have changed, update this assertion if that is expected, or reword strings. Collisions: ${collisionStrings.join('\n')}`); | ||
| } | ||
| resolveMessageCollisions(strings); | ||
|
|
||
| writeStringsToCtcFiles('en-US', strings); | ||
| console.log('Written to disk!', 'en-US.ctc.json'); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did you figure out why this was previously passing/not a TC issue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep! this was the bug I was alluding to in #12697 (comment) where the previous code only ever checked for
meaningcollisions with the first occurrence of a message and not between all occurrences of the message.No clue. If I had to guess, it would be that they haven't needed to be translated yet at the same time so they weren't colliding in TC yet? But I have no clue why this has worked so far given the way TC has been explained to me.