Skip to content

Commit 3c34a40

Browse files
Google APIscopybara-github
authored andcommitted
feat: new Bytes and File types: POWERPOINT and EXCEL
PiperOrigin-RevId: 437260831
1 parent 422e8b7 commit 3c34a40

2 files changed

Lines changed: 109 additions & 91 deletions

File tree

google/privacy/dlp/v2/dlp.proto

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021 Google LLC
1+
// Copyright 2022 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@ syntax = "proto3";
1616

1717
package google.privacy.dlp.v2;
1818

19+
import "google/api/annotations.proto";
1920
import "google/api/client.proto";
2021
import "google/api/field_behavior.proto";
2122
import "google/api/resource.proto";
@@ -28,7 +29,6 @@ import "google/rpc/status.proto";
2829
import "google/type/date.proto";
2930
import "google/type/dayofweek.proto";
3031
import "google/type/timeofday.proto";
31-
import "google/api/annotations.proto";
3232

3333
option csharp_namespace = "Google.Cloud.Dlp.V2";
3434
option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp";
@@ -719,8 +719,8 @@ message InspectionRuleSet {
719719
// When used with redactContent only info_types and min_likelihood are currently
720720
// used.
721721
message InspectConfig {
722-
// Configuration to control the number of findings returned. Cannot be set if
723-
// de-identification is requested.
722+
// Configuration to control the number of findings returned for inspection.
723+
// This is not used for de-identification or data profiling.
724724
message FindingLimits {
725725
// Max findings configuration per infoType, per content item or long
726726
// running DlpJob.
@@ -769,21 +769,23 @@ message InspectConfig {
769769
Likelihood min_likelihood = 2;
770770

771771
// Configuration to control the number of findings returned.
772+
// This is not used for data profiling.
772773
FindingLimits limits = 3;
773774

774775
// When true, a contextual quote from the data that triggered a finding is
775776
// included in the response; see Finding.quote.
777+
// This is not used for data profiling.
776778
bool include_quote = 4;
777779

778780
// When true, excludes type information of the findings.
781+
// This is not used for data profiling.
779782
bool exclude_info_types = 5;
780783

781784
// CustomInfoTypes provided by the user. See
782785
// https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
783786
repeated CustomInfoType custom_info_types = 6;
784787

785-
// List of options defining data content to scan.
786-
// If empty, text, images, and other content will be included.
788+
// Deprecated and unused.
787789
repeated ContentOption content_options = 8;
788790

789791
// Set of rules to apply to the findings for this InspectConfig.
@@ -825,6 +827,12 @@ message ByteContentItem {
825827
// pdf
826828
PDF = 8;
827829

830+
// pptx, pptm, potx, potm, pot
831+
POWERPOINT_DOCUMENT = 9;
832+
833+
// xlsx, xlsm, xltx, xltm
834+
EXCEL_DOCUMENT = 10;
835+
828836
// avro
829837
AVRO = 11;
830838

@@ -2857,6 +2865,18 @@ message TransformationOverview {
28572865
// Only one of 'transformation', 'field_transformation', or 'record_suppress'
28582866
// will be set.
28592867
message TransformationSummary {
2868+
// Possible outcomes of transformations.
2869+
enum TransformationResultCode {
2870+
// Unused
2871+
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;
2872+
2873+
// Transformation completed without an error.
2874+
SUCCESS = 1;
2875+
2876+
// Transformation had an error.
2877+
ERROR = 2;
2878+
}
2879+
28602880
// A collection that informs the user the number of times a particular
28612881
// `TransformationResultCode` and error details occurred.
28622882
message SummaryResult {
@@ -2871,18 +2891,6 @@ message TransformationSummary {
28712891
string details = 3;
28722892
}
28732893

2874-
// Possible outcomes of transformations.
2875-
enum TransformationResultCode {
2876-
// Unused
2877-
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;
2878-
2879-
// Transformation completed without an error.
2880-
SUCCESS = 1;
2881-
2882-
// Transformation had an error.
2883-
ERROR = 2;
2884-
}
2885-
28862894
// Set if the transformation was limited to a specific InfoType.
28872895
InfoType info_type = 1;
28882896

@@ -4352,7 +4360,7 @@ enum MatchingType {
43524360
MATCHING_TYPE_INVERSE_MATCH = 3;
43534361
}
43544362

4355-
// Options describing which parts of the provided content should be scanned.
4363+
// Deprecated and unused.
43564364
enum ContentOption {
43574365
// Includes entire content of a file or a data stream.
43584366
CONTENT_UNSPECIFIED = 0;

google/privacy/dlp/v2/storage.proto

Lines changed: 82 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021 Google LLC
1+
// Copyright 2022 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@ syntax = "proto3";
1616

1717
package google.privacy.dlp.v2;
1818

19-
import "google/api/annotations.proto";
2019
import "google/api/resource.proto";
2120
import "google/protobuf/timestamp.proto";
2221

@@ -41,18 +40,6 @@ message InfoType {
4140
string version = 2;
4241
}
4342

44-
// A reference to a StoredInfoType to use with scanning.
45-
message StoredType {
46-
// Resource name of the requested `StoredInfoType`, for example
47-
// `organizations/433245324/storedInfoTypes/432452342` or
48-
// `projects/project-id/storedInfoTypes/432452342`.
49-
string name = 1;
50-
51-
// Timestamp indicating when the version of the `StoredInfoType` used for
52-
// inspection was created. Output-only field, populated by the system.
53-
google.protobuf.Timestamp create_time = 2;
54-
}
55-
5643
// Categorization of results based on how likely they are to represent a match,
5744
// based on the number of elements they contain which imply a match.
5845
enum Likelihood {
@@ -73,6 +60,18 @@ enum Likelihood {
7360
VERY_LIKELY = 5;
7461
}
7562

63+
// A reference to a StoredInfoType to use with scanning.
64+
message StoredType {
65+
// Resource name of the requested `StoredInfoType`, for example
66+
// `organizations/433245324/storedInfoTypes/432452342` or
67+
// `projects/project-id/storedInfoTypes/432452342`.
68+
string name = 1;
69+
70+
// Timestamp indicating when the version of the `StoredInfoType` used for
71+
// inspection was created. Output-only field, populated by the system.
72+
google.protobuf.Timestamp create_time = 2;
73+
}
74+
7675
// Custom information type provided by the user. Used to find domain-specific
7776
// sensitive information configurable to the data in question.
7877
message CustomInfoType {
@@ -85,7 +84,7 @@ message CustomInfoType {
8584
// Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
8685
// will be replaced with whitespace when scanning for matches, so the
8786
// dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
88-
// Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
87+
// "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
8988
// surrounding any match must be of a different type than the adjacent
9089
// characters within the word, so letters must be next to non-letters and
9190
// digits next to non-digits. For example, the dictionary word "jen" will
@@ -98,7 +97,7 @@ message CustomInfoType {
9897
// [limits](https://cloud.google.com/dlp/limits) page contains details about
9998
// the size limits of dictionaries. For dictionaries that do not fit within
10099
// these constraints, consider using `LargeCustomDictionaryConfig` in the
101-
// [limits](https://cloud.google.com/dlp/limits) page contains details about
100+
// `StoredInfoType` API.
102101
message Dictionary {
103102
// Message defining a list of words or phrases to search for in the data.
104103
message WordList {
@@ -124,7 +123,7 @@ message CustomInfoType {
124123
// (https://github.com/google/re2/wiki/Syntax) can be found under the
125124
// google/re2 repository on GitHub.
126125
string pattern = 1;
127-
// (https://github.com/google/re2/wiki/Syntax) can be found under the
126+
128127
// The index of the submatch to extract as findings. When not
129128
// specified, the entire match is returned. No more than 3 may be included.
130129
repeated int32 group_indexes = 2;
@@ -135,10 +134,12 @@ message CustomInfoType {
135134
// [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
136135
// These types of transformations are
137136
// those that perform pseudonymization, thereby producing a "surrogate" as
138-
// [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
137+
// output. This should be used in conjunction with a field on the
139138
// transformation such as `surrogate_info_type`. This CustomInfoType does
140139
// not support the use of `detection_rules`.
141-
message SurrogateType {}
140+
message SurrogateType {
141+
142+
}
142143

143144
// Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
144145
// `CustomInfoType` to alter behavior under certain circumstances, depending
@@ -284,6 +285,67 @@ message DatastoreOptions {
284285
KindExpression kind = 2;
285286
}
286287

288+
// Definitions of file type groups to scan. New types will be added to this
289+
// list.
290+
enum FileType {
291+
// Includes all files.
292+
FILE_TYPE_UNSPECIFIED = 0;
293+
294+
// Includes all file extensions not covered by another entry. Binary
295+
// scanning attempts to convert the content of the file to utf_8 to scan
296+
// the file.
297+
// If you wish to avoid this fall back, specify one or more of the other
298+
// FileType's in your storage scan.
299+
BINARY_FILE = 1;
300+
301+
// Included file extensions:
302+
// asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
303+
// dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
304+
// mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
305+
// properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
306+
// shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
307+
// txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
308+
TEXT_FILE = 2;
309+
310+
// Included file extensions:
311+
// bmp, gif, jpg, jpeg, jpe, png.
312+
// bytes_limit_per_file has no effect on image files.
313+
// Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
314+
IMAGE = 3;
315+
316+
// Word files >30 MB will be scanned as binary files.
317+
// Included file extensions:
318+
// docx, dotx, docm, dotm
319+
WORD = 5;
320+
321+
// PDF files >30 MB will be scanned as binary files.
322+
// Included file extensions:
323+
// pdf
324+
PDF = 6;
325+
326+
// Included file extensions:
327+
// avro
328+
AVRO = 7;
329+
330+
// Included file extensions:
331+
// csv
332+
CSV = 8;
333+
334+
// Included file extensions:
335+
// tsv
336+
TSV = 9;
337+
338+
// Powerpoint files >30 MB will be scanned as binary files.
339+
// Included file extensions:
340+
// pptx, pptm, potx, potm, pot
341+
POWERPOINT = 11;
342+
343+
// Excel files >30 MB will be scanned as binary files.
344+
// Included file extensions:
345+
// xlsx, xlsm, xltx, xltm
346+
EXCEL = 12;
347+
}
348+
287349
// Message representing a set of files in a Cloud Storage bucket. Regular
288350
// expressions are used to allow fine-grained control over which files in the
289351
// bucket to include.
@@ -330,7 +392,7 @@ message CloudStorageRegexFileSet {
330392
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
331393
// under the google/re2 repository on GitHub.
332394
repeated string include_regex = 2;
333-
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
395+
334396
// A list of regular expressions matching file paths to exclude. All files in
335397
// the bucket that match at least one of these regular expressions will be
336398
// excluded from the scan.
@@ -339,7 +401,6 @@ message CloudStorageRegexFileSet {
339401
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
340402
// under the google/re2 repository on GitHub.
341403
repeated string exclude_regex = 3;
342-
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
343404
}
344405

345406
// Options defining a file or a set of files within a Google Cloud Storage
@@ -531,57 +592,6 @@ message StorageConfig {
531592
TimespanConfig timespan_config = 6;
532593
}
533594

534-
// Definitions of file type groups to scan. New types will be added to this
535-
// list.
536-
enum FileType {
537-
// Includes all files.
538-
FILE_TYPE_UNSPECIFIED = 0;
539-
540-
// Includes all file extensions not covered by another entry. Binary
541-
// scanning attempts to convert the content of the file to utf_8 to scan
542-
// the file.
543-
// If you wish to avoid this fall back, specify one or more of the other
544-
// FileType's in your storage scan.
545-
BINARY_FILE = 1;
546-
547-
// Included file extensions:
548-
// asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
549-
// dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
550-
// mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
551-
// properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
552-
// shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
553-
// txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
554-
TEXT_FILE = 2;
555-
556-
// Included file extensions:
557-
// bmp, gif, jpg, jpeg, jpe, png.
558-
// bytes_limit_per_file has no effect on image files.
559-
// Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
560-
IMAGE = 3;
561-
562-
// Word files >30 MB will be scanned as binary files.
563-
// Included file extensions:
564-
// docx, dotx, docm, dotm
565-
WORD = 5;
566-
567-
// PDF files >30 MB will be scanned as binary files.
568-
// Included file extensions:
569-
// pdf
570-
PDF = 6;
571-
572-
// Included file extensions:
573-
// avro
574-
AVRO = 7;
575-
576-
// Included file extensions:
577-
// csv
578-
CSV = 8;
579-
580-
// Included file extensions:
581-
// tsv
582-
TSV = 9;
583-
}
584-
585595
// Configuration to control jobs where the content being inspected is outside
586596
// of Google Cloud Platform.
587597
message HybridOptions {

0 commit comments

Comments
 (0)