1- // Copyright 2021 Google LLC
1+ // Copyright 2022 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@ syntax = "proto3";
1616
1717package google.privacy.dlp.v2 ;
1818
19- import "google/api/annotations.proto" ;
2019import "google/api/resource.proto" ;
2120import "google/protobuf/timestamp.proto" ;
2221
@@ -41,18 +40,6 @@ message InfoType {
4140 string version = 2 ;
4241}
4342
44- // A reference to a StoredInfoType to use with scanning.
45- message StoredType {
46- // Resource name of the requested `StoredInfoType`, for example
47- // `organizations/433245324/storedInfoTypes/432452342` or
48- // `projects/project-id/storedInfoTypes/432452342`.
49- string name = 1 ;
50-
51- // Timestamp indicating when the version of the `StoredInfoType` used for
52- // inspection was created. Output-only field, populated by the system.
53- google.protobuf.Timestamp create_time = 2 ;
54- }
55-
5643// Categorization of results based on how likely they are to represent a match,
5744// based on the number of elements they contain which imply a match.
5845enum Likelihood {
@@ -73,6 +60,18 @@ enum Likelihood {
7360 VERY_LIKELY = 5 ;
7461}
7562
63+ // A reference to a StoredInfoType to use with scanning.
64+ message StoredType {
65+ // Resource name of the requested `StoredInfoType`, for example
66+ // `organizations/433245324/storedInfoTypes/432452342` or
67+ // `projects/project-id/storedInfoTypes/432452342`.
68+ string name = 1 ;
69+
70+ // Timestamp indicating when the version of the `StoredInfoType` used for
71+ // inspection was created. Output-only field, populated by the system.
72+ google.protobuf.Timestamp create_time = 2 ;
73+ }
74+
7675// Custom information type provided by the user. Used to find domain-specific
7776// sensitive information configurable to the data in question.
7877message CustomInfoType {
@@ -85,7 +84,7 @@ message CustomInfoType {
8584 // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
8685 // will be replaced with whitespace when scanning for matches, so the
8786 // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
88- // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
87+ // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
8988 // surrounding any match must be of a different type than the adjacent
9089 // characters within the word, so letters must be next to non-letters and
9190 // digits next to non-digits. For example, the dictionary word "jen" will
@@ -98,7 +97,7 @@ message CustomInfoType {
9897 // [limits](https://cloud.google.com/dlp/limits) page contains details about
9998 // the size limits of dictionaries. For dictionaries that do not fit within
10099 // these constraints, consider using `LargeCustomDictionaryConfig` in the
101- // [limits](https://cloud.google.com/dlp/limits) page contains details about
100+ // `StoredInfoType` API.
102101 message Dictionary {
103102 // Message defining a list of words or phrases to search for in the data.
104103 message WordList {
@@ -124,7 +123,7 @@ message CustomInfoType {
124123 // (https://github.com/google/re2/wiki/Syntax) can be found under the
125124 // google/re2 repository on GitHub.
126125 string pattern = 1 ;
127- // (https://github.com/google/re2/wiki/Syntax) can be found under the
126+
128127 // The index of the submatch to extract as findings. When not
129128 // specified, the entire match is returned. No more than 3 may be included.
130129 repeated int32 group_indexes = 2 ;
@@ -135,10 +134,12 @@ message CustomInfoType {
135134 // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
136135 // These types of transformations are
137136 // those that perform pseudonymization, thereby producing a "surrogate" as
138- // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
137+ // output. This should be used in conjunction with a field on the
139138 // transformation such as `surrogate_info_type`. This CustomInfoType does
140139 // not support the use of `detection_rules`.
141- message SurrogateType {}
140+ message SurrogateType {
141+
142+ }
142143
143144 // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
144145 // `CustomInfoType` to alter behavior under certain circumstances, depending
@@ -284,6 +285,67 @@ message DatastoreOptions {
284285 KindExpression kind = 2 ;
285286}
286287
288+ // Definitions of file type groups to scan. New types will be added to this
289+ // list.
290+ enum FileType {
291+ // Includes all files.
292+ FILE_TYPE_UNSPECIFIED = 0 ;
293+
294+ // Includes all file extensions not covered by another entry. Binary
295+ // scanning attempts to convert the content of the file to utf_8 to scan
296+ // the file.
297+ // If you wish to avoid this fall back, specify one or more of the other
298+ // FileType's in your storage scan.
299+ BINARY_FILE = 1 ;
300+
301+ // Included file extensions:
302+ // asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
303+ // dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
304+ // mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
305+ // properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
306+ // shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
307+ // txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
308+ TEXT_FILE = 2 ;
309+
310+ // Included file extensions:
311+ // bmp, gif, jpg, jpeg, jpe, png.
312+ // bytes_limit_per_file has no effect on image files.
313+ // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
314+ IMAGE = 3 ;
315+
316+ // Word files >30 MB will be scanned as binary files.
317+ // Included file extensions:
318+ // docx, dotx, docm, dotm
319+ WORD = 5 ;
320+
321+ // PDF files >30 MB will be scanned as binary files.
322+ // Included file extensions:
323+ // pdf
324+ PDF = 6 ;
325+
326+ // Included file extensions:
327+ // avro
328+ AVRO = 7 ;
329+
330+ // Included file extensions:
331+ // csv
332+ CSV = 8 ;
333+
334+ // Included file extensions:
335+ // tsv
336+ TSV = 9 ;
337+
338+ // Powerpoint files >30 MB will be scanned as binary files.
339+ // Included file extensions:
340+ // pptx, pptm, potx, potm, pot
341+ POWERPOINT = 11 ;
342+
343+ // Excel files >30 MB will be scanned as binary files.
344+ // Included file extensions:
345+ // xlsx, xlsm, xltx, xltm
346+ EXCEL = 12 ;
347+ }
348+
287349// Message representing a set of files in a Cloud Storage bucket. Regular
288350// expressions are used to allow fine-grained control over which files in the
289351// bucket to include.
@@ -330,7 +392,7 @@ message CloudStorageRegexFileSet {
330392 // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
331393 // under the google/re2 repository on GitHub.
332394 repeated string include_regex = 2 ;
333- // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
395+
334396 // A list of regular expressions matching file paths to exclude. All files in
335397 // the bucket that match at least one of these regular expressions will be
336398 // excluded from the scan.
@@ -339,7 +401,6 @@ message CloudStorageRegexFileSet {
339401 // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
340402 // under the google/re2 repository on GitHub.
341403 repeated string exclude_regex = 3 ;
342- // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
343404}
344405
345406// Options defining a file or a set of files within a Google Cloud Storage
@@ -531,57 +592,6 @@ message StorageConfig {
531592 TimespanConfig timespan_config = 6 ;
532593}
533594
534- // Definitions of file type groups to scan. New types will be added to this
535- // list.
536- enum FileType {
537- // Includes all files.
538- FILE_TYPE_UNSPECIFIED = 0 ;
539-
540- // Includes all file extensions not covered by another entry. Binary
541- // scanning attempts to convert the content of the file to utf_8 to scan
542- // the file.
543- // If you wish to avoid this fall back, specify one or more of the other
544- // FileType's in your storage scan.
545- BINARY_FILE = 1 ;
546-
547- // Included file extensions:
548- // asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
549- // dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
550- // mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
551- // properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
552- // shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
553- // txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
554- TEXT_FILE = 2 ;
555-
556- // Included file extensions:
557- // bmp, gif, jpg, jpeg, jpe, png.
558- // bytes_limit_per_file has no effect on image files.
559- // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
560- IMAGE = 3 ;
561-
562- // Word files >30 MB will be scanned as binary files.
563- // Included file extensions:
564- // docx, dotx, docm, dotm
565- WORD = 5 ;
566-
567- // PDF files >30 MB will be scanned as binary files.
568- // Included file extensions:
569- // pdf
570- PDF = 6 ;
571-
572- // Included file extensions:
573- // avro
574- AVRO = 7 ;
575-
576- // Included file extensions:
577- // csv
578- CSV = 8 ;
579-
580- // Included file extensions:
581- // tsv
582- TSV = 9 ;
583- }
584-
585595// Configuration to control jobs where the content being inspected is outside
586596// of Google Cloud Platform.
587597message HybridOptions {
0 commit comments