Skip to content

Commit 94cb07a

Browse files
update docs & add tests
1 parent c2dbdc0 commit 94cb07a

3 files changed

Lines changed: 144 additions & 31 deletions

File tree

packages/language/src/document.js

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ function Document(language, config) {
6767
var content = config.content || config;
6868

6969
this.api = language.api;
70-
this.encodingType = 'UTF16';
70+
this.encodingType = this.detectEncodingType_(config);
7171

7272
this.document = {};
7373

@@ -232,7 +232,8 @@ Document.PART_OF_SPEECH = {
232232
* @param {object=} options - Configuration object. See
233233
* [documents.annotateText](https://cloud.google.com/natural-language/docs/reference/rest/v1/documents/annotateText#features).
234234
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
235-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
235+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
236+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
236237
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
237238
* @param {boolean} options.entities - Detect the entities from this document.
238239
* By default, all features (`entities`, `sentiment`, and `syntax`) are
@@ -590,7 +591,8 @@ Document.prototype.annotate = function(options, callback) {
590591
* @param {object=} options - Configuration object. See
591592
* [documents.annotateText](https://cloud.google.com/natural-language/reference/rest/v1/documents/analyzeEntities#request-body).
592593
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
593-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
594+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
595+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
594596
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
595597
* @param {boolean} options.verbose - Enable verbose mode for more detailed
596598
* results. Default: `false`
@@ -726,7 +728,8 @@ Document.prototype.detectEntities = function(options, callback) {
726728
* @param {object=} options - Configuration object. See
727729
* [documents.annotateText](https://cloud.google.com/natural-language/reference/rest/v1/documents/analyzeSentiment#request-body).
728730
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
729-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
731+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
732+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
730733
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
731734
* @param {boolean} options.verbose - Enable verbose mode for more detailed
732735
* results. Default: `false`
@@ -821,7 +824,8 @@ Document.prototype.detectSentiment = function(options, callback) {
821824
* @param {object=} options - Configuration object. See
822825
* [documents.annotateSyntax](https://cloud.google.com/natural-language/reference/rest/v1/documents/analyzeSyntax#request-body).
823826
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
824-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
827+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
828+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
825829
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
826830
* @param {boolean} options.verbose - Enable verbose mode for more detailed
827831
* results. Default: `false`
@@ -1142,12 +1146,13 @@ Document.sortByProperty_ = function(propertyName) {
11421146
*
11431147
* @param {object} options - Configuration object.
11441148
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
1145-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
1149+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
1150+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
11461151
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
11471152
* @return {string} - The encodingType, as understood by the API.
11481153
*/
11491154
Document.prototype.detectEncodingType_ = function(options) {
1150-
var encoding = this.encodingType || options.encoding || options.encodingType;
1155+
var encoding = options.encoding || options.encodingType || this.encodingType;
11511156

11521157
if (!encoding) {
11531158
return;

packages/language/src/index.js

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ function Language(options) {
8282
* @param {object=} options - Configuration object. See
8383
* [documents.annotateText](https://cloud.google.com/natural-language/reference/rest/v1/documents/annotateText#request-body).
8484
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
85-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
85+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
86+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
8687
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
8788
* @param {string} options.language - The language of the text.
8889
* @param {string} options.type - The type of document, either `html` or `text`.
@@ -175,7 +176,8 @@ Language.prototype.annotate = function(content, options, callback) {
175176
* @param {object=} options - Configuration object. See
176177
* [documents.annotateText](https://cloud.google.com/natural-language/reference/rest/v1/documents/analyzeEntities#request-body).
177178
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
178-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
179+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
180+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
179181
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
180182
* @param {string} options.language - The language of the text.
181183
* @param {string} options.type - The type of document, either `html` or `text`.
@@ -267,7 +269,8 @@ Language.prototype.detectEntities = function(content, options, callback) {
267269
* @param {object=} options - Configuration object. See
268270
* [documents.annotateText](https://cloud.google.com/natural-language/reference/rest/v1/documents/analyzeSentiment#request-body).
269271
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
270-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
272+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
273+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
271274
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
272275
* @param {string} options.language - The language of the text.
273276
* @param {string} options.type - The type of document, either `html` or `text`.
@@ -350,7 +353,8 @@ Language.prototype.detectSentiment = function(content, options, callback) {
350353
* @param {object=} options - Configuration object. See
351354
* [documents.analyzeSyntax](https://cloud.google.com/natural-language/reference/rest/v1/documents/analyzeSyntax#request-body).
352355
* @param {string} options.encoding - `UTF8` (also, `buffer`), `UTF16` (also
353-
* `string`), or `UTF32`. (Alias for `options.encodingType`). See
356+
* `string`), or `UTF32`. (Alias for `options.encodingType`). Default:
357+
* 'UTF8' if a Buffer, otherwise 'UTF16'. See
354358
* [`EncodingType`](https://cloud.google.com/natural-language/reference/rest/v1/EncodingType)
355359
* @param {string} options.language - The language of the text.
356360
* @param {string} options.type - The type of document, either `html` or `text`.

packages/language/test/document.js

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,14 @@ describe('Document', function() {
5757
});
5858

5959
DocumentCache = extend(true, {}, Document);
60+
DocumentCache.prototype = extend(true, {}, Document.prototype);
6061
});
6162

6263
beforeEach(function() {
6364
isCustomTypeOverride = null;
6465

65-
for (var property in DocumentCache) {
66-
if (DocumentCache.hasOwnProperty(property)) {
67-
Document[property] = DocumentCache[property];
68-
}
69-
}
66+
extend(Document, DocumentCache);
67+
Document.prototype = extend({}, DocumentCache.prototype);
7068

7169
document = new Document(LANGUAGE, CONFIG);
7270
});
@@ -80,22 +78,29 @@ describe('Document', function() {
8078
assert(promisified);
8179
});
8280

81+
it('should set the correct encodingType', function() {
82+
var detectedEncodingType = 'detected-encoding-type';
83+
var config = {
84+
content: CONFIG
85+
};
86+
87+
Document.prototype.detectEncodingType_ = function(options) {
88+
assert.strictEqual(options, config);
89+
return detectedEncodingType;
90+
};
91+
92+
var document = new Document(LANGUAGE, config);
93+
94+
assert.strictEqual(document.encodingType, detectedEncodingType);
95+
});
96+
8397
it('should set the correct document for inline content', function() {
8498
assert.deepEqual(document.document, {
8599
content: CONFIG,
86100
type: 'PLAIN_TEXT'
87101
});
88102
});
89103

90-
it('should set and uppercase the correct encodingType', function() {
91-
var document = new Document(LANGUAGE, {
92-
content: CONFIG,
93-
encoding: 'utf-8'
94-
});
95-
96-
assert.strictEqual(document.encodingType, 'UTF8');
97-
});
98-
99104
it('should set the correct document for content with language', function() {
100105
var document = new Document(LANGUAGE, {
101106
content: CONFIG,
@@ -152,6 +157,14 @@ describe('Document', function() {
152157
encodeURIComponent(file.id),
153158
].join(''));
154159
});
160+
161+
it('should default the encodingType to UTF8 if a Buffer', function() {
162+
var document = new Document(LANGUAGE, {
163+
content: new Buffer([])
164+
});
165+
166+
assert.strictEqual(document.encodingType, 'UTF8');
167+
});
155168
});
156169

157170
describe('LABEL_DESCRIPTIONS', function() {
@@ -266,6 +279,13 @@ describe('Document', function() {
266279

267280
describe('annotate', function() {
268281
it('should make the correct API request', function(done) {
282+
var detectedEncodingType = 'detected-encoding-type';
283+
284+
document.detectEncodingType_ = function(options) {
285+
assert.deepEqual(options, {});
286+
return detectedEncodingType;
287+
};
288+
269289
document.api.Language = {
270290
annotateText: function(reqOpts) {
271291
assert.strictEqual(reqOpts.document, document.document);
@@ -276,13 +296,12 @@ describe('Document', function() {
276296
extractSyntax: true
277297
});
278298

279-
assert.strictEqual(reqOpts.encodingType, document.encodingType);
299+
assert.strictEqual(reqOpts.encodingType, detectedEncodingType);
280300

281301
done();
282302
}
283303
};
284304

285-
document.encodingType = 'encoding-type';
286305
document.annotate(assert.ifError);
287306
});
288307

@@ -542,15 +561,21 @@ describe('Document', function() {
542561

543562
describe('detectEntities', function() {
544563
it('should make the correct API request', function(done) {
564+
var detectedEncodingType = 'detected-encoding-type';
565+
566+
document.detectEncodingType_ = function(options) {
567+
assert.deepEqual(options, {});
568+
return detectedEncodingType;
569+
};
570+
545571
document.api.Language = {
546572
analyzeEntities: function(reqOpts) {
547573
assert.strictEqual(reqOpts.document, document.document);
548-
assert.strictEqual(reqOpts.encodingType, document.encodingType);
574+
assert.strictEqual(reqOpts.encodingType, detectedEncodingType);
549575
done();
550576
}
551577
};
552578

553-
document.encodingType = 'encoding-type';
554579
document.detectEntities(assert.ifError);
555580
});
556581

@@ -631,10 +656,17 @@ describe('Document', function() {
631656

632657
describe('detectSentiment', function() {
633658
it('should make the correct API request', function(done) {
659+
var detectedEncodingType = 'detected-encoding-type';
660+
661+
document.detectEncodingType_ = function(options) {
662+
assert.deepEqual(options, {});
663+
return detectedEncodingType;
664+
};
665+
634666
document.api.Language = {
635667
analyzeSentiment: function(reqOpts) {
636668
assert.strictEqual(reqOpts.document, document.document);
637-
assert.strictEqual(reqOpts.encodingType, document.encodingType);
669+
assert.strictEqual(reqOpts.encodingType, detectedEncodingType);
638670
done();
639671
}
640672
};
@@ -747,10 +779,17 @@ describe('Document', function() {
747779

748780
describe('detectSyntax', function() {
749781
it('should make the correct API request', function(done) {
782+
var detectedEncodingType = 'detected-encoding-type';
783+
784+
document.detectEncodingType_ = function(options) {
785+
assert.deepEqual(options, {});
786+
return detectedEncodingType;
787+
};
788+
750789
document.api.Language = {
751790
analyzeSyntax: function(reqOpts) {
752791
assert.strictEqual(reqOpts.document, document.document);
753-
assert.strictEqual(reqOpts.encodingType, document.encodingType);
792+
assert.strictEqual(reqOpts.encodingType, detectedEncodingType);
754793
done();
755794
}
756795
};
@@ -1102,4 +1141,69 @@ describe('Document', function() {
11021141
);
11031142
});
11041143
});
1144+
1145+
describe('detectEncodingType_', function() {
1146+
it('should return if no encoding type is set', function() {
1147+
assert.strictEqual(document.detectEncodingType_({
1148+
encoding: ''
1149+
}), undefined);
1150+
1151+
assert.strictEqual(document.detectEncodingType_({
1152+
encodingType: ''
1153+
}), undefined);
1154+
1155+
document.encodingType = '';
1156+
assert.strictEqual(document.detectEncodingType_({}), undefined);
1157+
});
1158+
1159+
it('should return UTF8 for BUFFER input', function() {
1160+
assert.strictEqual(document.detectEncodingType_({
1161+
encodingType: 'buffer'
1162+
}), 'UTF8');
1163+
});
1164+
1165+
it('should return UTF16 for STRING input', function() {
1166+
assert.strictEqual(document.detectEncodingType_({
1167+
encodingType: 'string'
1168+
}), 'UTF16');
1169+
});
1170+
1171+
it('should return original value', function() {
1172+
assert.strictEqual(document.detectEncodingType_({
1173+
encodingType: 'UTF32'
1174+
}), 'UTF32');
1175+
});
1176+
1177+
it('should capitilize and remove whitespace and hyphens', function() {
1178+
assert.strictEqual(document.detectEncodingType_({
1179+
encodingType: 'utf32'
1180+
}), 'UTF32');
1181+
1182+
assert.strictEqual(document.detectEncodingType_({
1183+
encodingType: 'UTF 32'
1184+
}), 'UTF32');
1185+
1186+
assert.strictEqual(document.detectEncodingType_({
1187+
encodingType: 'UTF-32'
1188+
}), 'UTF32');
1189+
});
1190+
1191+
it('should accept options.encoding', function() {
1192+
assert.strictEqual(document.detectEncodingType_({
1193+
encoding: 'UTF32'
1194+
}), 'UTF32');
1195+
});
1196+
1197+
it('should accept options.encodingType', function() {
1198+
assert.strictEqual(document.detectEncodingType_({
1199+
encodingType: 'UTF32'
1200+
}), 'UTF32');
1201+
});
1202+
1203+
it('should default to encodingType instance property', function() {
1204+
document.encodingType = 'utf-32';
1205+
1206+
assert.strictEqual(document.detectEncodingType_({}), 'UTF32');
1207+
});
1208+
});
11051209
});

0 commit comments

Comments
 (0)