1- // Copyright 2022 Google LLC
1+ // Copyright 2023 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -38,51 +38,73 @@ message InfoType {
3838
3939 // Optional version name for this InfoType.
4040 string version = 2 ;
41+
42+ // Optional custom sensitivity for this InfoType.
43+ // This only applies to data profiling.
44+ SensitivityScore sensitivity_score = 3 ;
4145}
4246
43- // Score is a summary of all elements in the data profile.
44- // A higher number means more sensitive.
47+ // Score is calculated from of all elements in the data profile.
48+ // A higher level means the data is more sensitive.
4549message SensitivityScore {
46- // Various score levels for resources.
50+ // Various sensitivity score levels for resources.
4751 enum SensitivityScoreLevel {
4852 // Unused.
4953 SENSITIVITY_SCORE_UNSPECIFIED = 0 ;
5054
51- // No sensitive information detected. Limited access.
55+ // No sensitive information detected. The resource isn't publicly
56+ // accessible.
5257 SENSITIVITY_LOW = 10 ;
5358
54- // Medium risk - PII, potentially sensitive data, or fields with free-text
55- // data that are at higher risk of having intermittent sensitive data.
56- // Consider limiting access.
59+ // Medium risk. Contains personally identifiable information (PII),
60+ // potentially sensitive data, or fields with free-text data that are at a
61+ // higher risk of having intermittent sensitive data. Consider limiting
62+ // access.
5763 SENSITIVITY_MODERATE = 20 ;
5864
59- // High risk – SPII may be present. Exfiltration of data may lead to user
60- // data loss. Re-identification of users may be possible. Consider limiting
61- // usage and or removing SPII.
65+ // High risk. Sensitive personally identifiable information (SPII) can be
66+ // present. Exfiltration of data can lead to user data loss.
67+ // Re-identification of users might be possible. Consider limiting usage and
68+ // or removing SPII.
6269 SENSITIVITY_HIGH = 30 ;
6370 }
6471
65- // The score applied to the resource.
72+ // The sensitivity score applied to the resource.
6673 SensitivityScoreLevel score = 1 ;
6774}
6875
69- // Categorization of results based on how likely they are to represent a match,
70- // based on the number of elements they contain which imply a match.
76+ // Coarse-grained confidence level of how well a particular finding
77+ // satisfies the criteria to match a particular infoType.
78+ //
79+ // Likelihood is calculated based on the number of signals a
80+ // finding has that implies that the finding matches the infoType. For
81+ // example, a string that has an '@' and a '.com' is more likely to be a
82+ // match for an email address than a string that only has an '@'.
83+ //
84+ // In general, the highest likelihood level has the strongest signals that
85+ // indicate a match. That is, a finding with a high likelihood has a low chance
86+ // of being a false positive.
87+ //
88+ // For more information about each likelihood level
89+ // and how likelihood works, see [Match
90+ // likelihood](https://cloud.google.com/dlp/docs/likelihood).
7191enum Likelihood {
7292 // Default value; same as POSSIBLE.
7393 LIKELIHOOD_UNSPECIFIED = 0 ;
7494
75- // Few matching elements .
95+ // Highest chance of a false positive .
7696 VERY_UNLIKELY = 1 ;
7797
98+ // High chance of a false positive.
7899 UNLIKELY = 2 ;
79100
80- // Some matching elements .
101+ // Some matching signals. The default value .
81102 POSSIBLE = 3 ;
82103
104+ // Low chance of a false positive.
83105 LIKELY = 4 ;
84106
85- // Many matching elements .
107+ // Confidence level is high. Lowest chance of a false positive .
86108 VERY_LIKELY = 5 ;
87109}
88110
@@ -163,9 +185,7 @@ message CustomInfoType {
163185 // output. This should be used in conjunction with a field on the
164186 // transformation such as `surrogate_info_type`. This CustomInfoType does
165187 // not support the use of `detection_rules`.
166- message SurrogateType {
167-
168- }
188+ message SurrogateType {}
169189
170190 // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
171191 // `CustomInfoType` to alter behavior under certain circumstances, depending
@@ -282,6 +302,13 @@ message CustomInfoType {
282302 // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
283303 // to be returned. It still can be used for rules matching.
284304 ExclusionType exclusion_type = 8 ;
305+
306+ // Sensitivity for this CustomInfoType. If this CustomInfoType extends an
307+ // existing InfoType, the sensitivity here will take precedence over that of
308+ // the original InfoType. If unset for a CustomInfoType, it will default to
309+ // HIGH.
310+ // This only applies to data profiling.
311+ SensitivityScore sensitivity_score = 9 ;
285312}
286313
287314// General identifier of a data field in a storage service.
@@ -330,7 +357,7 @@ enum FileType {
330357 // scanning attempts to convert the content of the file to utf_8 to scan
331358 // the file.
332359 // If you wish to avoid this fall back, specify one or more of the other
333- // FileType's in your storage scan.
360+ // file types in your storage scan.
334361 BINARY_FILE = 1 ;
335362
336363 // Included file extensions:
@@ -343,19 +370,24 @@ enum FileType {
343370 TEXT_FILE = 2 ;
344371
345372 // Included file extensions:
346- // bmp, gif, jpg, jpeg, jpe, png.
347- // bytes_limit_per_file has no effect on image files.
348- // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
373+ // bmp, gif, jpg, jpeg, jpe, png. Setting
374+ // [bytes_limit_per_file][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
375+ // or
376+ // [bytes_limit_per_file_percent][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
377+ // has no effect on image files. Image inspection is restricted to the
378+ // `global`, `us`, `asia`, and `europe` regions.
349379 IMAGE = 3 ;
350380
351- // Word files > 30 MB will be scanned as binary files.
381+ // Microsoft Word files larger than 30 MB will be scanned as binary files.
352382 // Included file extensions:
353- // docx, dotx, docm, dotm
383+ // docx, dotx, docm, dotm. Setting `bytes_limit_per_file` or
384+ // `bytes_limit_per_file_percent` has no effect on Word files.
354385 WORD = 5 ;
355386
356- // PDF files > 30 MB will be scanned as binary files.
387+ // PDF files larger than 30 MB will be scanned as binary files.
357388 // Included file extensions:
358- // pdf
389+ // pdf. Setting `bytes_limit_per_file` or `bytes_limit_per_file_percent`
390+ // has no effect on PDF files.
359391 PDF = 6 ;
360392
361393 // Included file extensions:
@@ -370,14 +402,16 @@ enum FileType {
370402 // tsv
371403 TSV = 9 ;
372404
373- // Powerpoint files >30 MB will be scanned as binary files.
374- // Included file extensions:
375- // pptx, pptm, potx, potm, pot
405+ // Microsoft PowerPoint files larger than 30 MB will be scanned as binary
406+ // files. Included file extensions:
407+ // pptx, pptm, potx, potm, pot. Setting `bytes_limit_per_file` or
408+ // `bytes_limit_per_file_percent` has no effect on PowerPoint files.
376409 POWERPOINT = 11 ;
377410
378- // Excel files > 30 MB will be scanned as binary files.
411+ // Microsoft Excel files larger than 30 MB will be scanned as binary files.
379412 // Included file extensions:
380- // xlsx, xlsm, xltx, xltm
413+ // xlsx, xlsm, xltx, xltm. Setting `bytes_limit_per_file` or
414+ // `bytes_limit_per_file_percent` has no effect on Excel files.
381415 EXCEL = 12 ;
382416}
383417
@@ -478,16 +512,22 @@ message CloudStorageOptions {
478512 FileSet file_set = 1 ;
479513
480514 // Max number of bytes to scan from a file. If a scanned file's size is bigger
481- // than this value then the rest of the bytes are omitted. Only one
482- // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
483- // Cannot be set if de-identification is requested.
515+ // than this value then the rest of the bytes are omitted. Only one of
516+ // `bytes_limit_per_file` and `bytes_limit_per_file_percent` can be specified.
517+ // This field can't be set if de-identification is requested. For certain file
518+ // types, setting this field has no effect. For more information, see [Limits
519+ // on bytes scanned per
520+ // file](https://cloud.google.com/dlp/docs/supported-file-types#max-byte-size-per-file).
484521 int64 bytes_limit_per_file = 4 ;
485522
486523 // Max percentage of bytes to scan from a file. The rest are omitted. The
487524 // number of bytes scanned is rounded down. Must be between 0 and 100,
488- // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
489- // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
490- // Cannot be set if de-identification is requested.
525+ // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one of
526+ // bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
527+ // This field can't be set if de-identification is requested. For certain file
528+ // types, setting this field has no effect. For more information, see [Limits
529+ // on bytes scanned per
530+ // file](https://cloud.google.com/dlp/docs/supported-file-types#max-byte-size-per-file).
491531 int32 bytes_limit_per_file_percent = 8 ;
492532
493533 // List of file type groups to include in the scan.
@@ -565,9 +605,15 @@ message BigQueryOptions {
565605
566606 // References to fields excluded from scanning. This allows you to skip
567607 // inspection of entire columns which you know have no findings.
608+ // When inspecting a table, we recommend that you inspect all columns.
609+ // Otherwise, findings might be affected because hints from excluded columns
610+ // will not be used.
568611 repeated FieldId excluded_fields = 5 ;
569612
570613 // Limit scanning only to these fields.
614+ // When inspecting a table, we recommend that you inspect all columns.
615+ // Otherwise, findings might be affected because hints from excluded columns
616+ // will not be used.
571617 repeated FieldId included_fields = 7 ;
572618}
573619
0 commit comments