@@ -4,17 +4,22 @@ import (
44 "context"
55 "encoding/json"
66 "errors"
7+ "fmt"
78 "strings"
89 "sync"
910 "syscall"
11+ "time"
1012
1113 "github.com/Microsoft/hcsshim/internal/cow"
1214 "github.com/Microsoft/hcsshim/internal/hcs/schema1"
1315 hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
16+ "github.com/Microsoft/hcsshim/internal/jobobject"
1417 "github.com/Microsoft/hcsshim/internal/log"
18+ "github.com/Microsoft/hcsshim/internal/logfields"
1519 "github.com/Microsoft/hcsshim/internal/oc"
1620 "github.com/Microsoft/hcsshim/internal/timeout"
1721 "github.com/Microsoft/hcsshim/internal/vmcompute"
22+ "github.com/sirupsen/logrus"
1823 "go.opencensus.io/trace"
1924)
2025
@@ -28,7 +33,8 @@ type System struct {
2833 waitBlock chan struct {}
2934 waitError error
3035 exitError error
31- os , typ string
36+ os , typ , owner string
37+ startTime time.Time
3238}
3339
3440func newSystem (id string ) * System {
@@ -38,6 +44,11 @@ func newSystem(id string) *System {
3844 }
3945}
4046
47+ // Implementation detail for silo naming, this should NOT be relied upon very heavily.
48+ func siloNameFmt (containerID string ) string {
49+ return fmt .Sprintf (`\Container_%s` , containerID )
50+ }
51+
4152// CreateComputeSystem creates a new compute system with the given configuration but does not start it.
4253func CreateComputeSystem (ctx context.Context , id string , hcsDocumentInterface interface {}) (_ * System , err error ) {
4354 operation := "hcs::CreateComputeSystem"
@@ -127,6 +138,7 @@ func (computeSystem *System) getCachedProperties(ctx context.Context) error {
127138 }
128139 computeSystem .typ = strings .ToLower (props .SystemType )
129140 computeSystem .os = strings .ToLower (props .RuntimeOSType )
141+ computeSystem .owner = strings .ToLower (props .Owner )
130142 if computeSystem .os == "" && computeSystem .typ == "container" {
131143 // Pre-RS5 HCS did not return the OS, but it only supported containers
132144 // that ran Windows.
@@ -195,7 +207,7 @@ func (computeSystem *System) Start(ctx context.Context) (err error) {
195207 if err != nil {
196208 return makeSystemError (computeSystem , operation , err , events )
197209 }
198-
210+ computeSystem . startTime = time . Now ()
199211 return nil
200212}
201213
@@ -324,11 +336,115 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr
324336 return properties , nil
325337}
326338
327- // PropertiesV2 returns the requested container properties targeting a V2 schema container.
328- func (computeSystem * System ) PropertiesV2 (ctx context.Context , types ... hcsschema.PropertyType ) (* hcsschema.Properties , error ) {
329- computeSystem .handleLock .RLock ()
330- defer computeSystem .handleLock .RUnlock ()
339+ // queryInProc handles querying for container properties without reaching out to HCS. `props`
340+ // will be updated to contain any data returned from the queries present in `types`. If any properties
341+ // failed to be queried they will be tallied up and returned in as the first return value. Failures on
342+ // query are NOT considered errors; the only failure case for this method is if the containers job object
343+ // cannot be opened.
344+ func (computeSystem * System ) queryInProc (ctx context.Context , props * hcsschema.Properties , types []hcsschema.PropertyType ) ([]hcsschema.PropertyType , error ) {
345+ // In the future we can make use of some new functionality in the HCS that allows you
346+ // to pass a job object for HCS to use for the container. Currently, the only way we'll
347+ // be able to open the job/silo is if we're running as SYSTEM.
348+ jobOptions := & jobobject.Options {
349+ UseNTVariant : true ,
350+ Name : siloNameFmt (computeSystem .id ),
351+ }
352+ job , err := jobobject .Open (ctx , jobOptions )
353+ if err != nil {
354+ return nil , err
355+ }
356+ defer job .Close ()
357+
358+ var fallbackQueryTypes []hcsschema.PropertyType
359+ for _ , propType := range types {
360+ switch propType {
361+ case hcsschema .PTStatistics :
362+ // Handle a bad caller asking for the same type twice. No use in re-querying if this is
363+ // filled in already.
364+ if props .Statistics == nil {
365+ props .Statistics , err = computeSystem .statisticsInProc (job )
366+ if err != nil {
367+ log .G (ctx ).WithError (err ).Warn ("failed to get statistics in-proc" )
368+
369+ fallbackQueryTypes = append (fallbackQueryTypes , propType )
370+ }
371+ }
372+ default :
373+ fallbackQueryTypes = append (fallbackQueryTypes , propType )
374+ }
375+ }
376+
377+ return fallbackQueryTypes , nil
378+ }
379+
380+ // statisticsInProc emulates what HCS does to grab statistics for a given container with a small
381+ // change to make grabbing the private working set total much more efficient.
382+ func (computeSystem * System ) statisticsInProc (job * jobobject.JobObject ) (* hcsschema.Statistics , error ) {
383+ // Start timestamp for these stats before we grab them to match HCS
384+ timestamp := time .Now ()
385+
386+ memInfo , err := job .QueryMemoryStats ()
387+ if err != nil {
388+ return nil , err
389+ }
390+
391+ processorInfo , err := job .QueryProcessorStats ()
392+ if err != nil {
393+ return nil , err
394+ }
395+
396+ storageInfo , err := job .QueryStorageStats ()
397+ if err != nil {
398+ return nil , err
399+ }
400+
401+ // This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation
402+ // with the class SystemProcessInformation which returns an array containing system information for *every*
403+ // process running on the machine. They then grab the pids that are running in the container and filter down
404+ // the entries in the array to only what's running in that silo and start tallying up the total. This doesn't
405+ // work well as performance should get worse if more processess are running on the machine in general and not
406+ // just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored
407+ // as well which isn't great and is wasted work to fetch.
408+ //
409+ // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private
410+ // working set ourselves and ask for everything else seperately. The optimization we can make here is
411+ // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating
412+ // the private working set in a more efficient manner by:
413+ //
414+ // 1. Find the pids running in the silo
415+ // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access)
416+ // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters
417+ // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2.
418+ privateWorkingSet , err := job .QueryPrivateWorkingSet ()
419+ if err != nil {
420+ return nil , err
421+ }
331422
423+ return & hcsschema.Statistics {
424+ Timestamp : timestamp ,
425+ ContainerStartTime : computeSystem .startTime ,
426+ Uptime100ns : uint64 (time .Since (computeSystem .startTime ).Nanoseconds ()) / 100 ,
427+ Memory : & hcsschema.MemoryStats {
428+ MemoryUsageCommitBytes : memInfo .JobMemory ,
429+ MemoryUsageCommitPeakBytes : memInfo .PeakJobMemoryUsed ,
430+ MemoryUsagePrivateWorkingSetBytes : privateWorkingSet ,
431+ },
432+ Processor : & hcsschema.ProcessorStats {
433+ RuntimeKernel100ns : uint64 (processorInfo .TotalKernelTime ),
434+ RuntimeUser100ns : uint64 (processorInfo .TotalUserTime ),
435+ TotalRuntime100ns : uint64 (processorInfo .TotalKernelTime + processorInfo .TotalUserTime ),
436+ },
437+ Storage : & hcsschema.StorageStats {
438+ ReadCountNormalized : uint64 (storageInfo .ReadStats .IoCount ),
439+ ReadSizeBytes : storageInfo .ReadStats .TotalSize ,
440+ WriteCountNormalized : uint64 (storageInfo .WriteStats .IoCount ),
441+ WriteSizeBytes : storageInfo .WriteStats .TotalSize ,
442+ },
443+ }, nil
444+ }
445+
446+ // hcsPropertiesV2Query is a helper to make a HcsGetComputeSystemProperties call using the V2 schema property types.
447+ func (computeSystem * System ) hcsPropertiesV2Query (ctx context.Context , types []hcsschema.PropertyType ) (* hcsschema.Properties , error ) {
332448 operation := "hcs::System::PropertiesV2"
333449
334450 queryBytes , err := json .Marshal (hcsschema.PropertyQuery {PropertyTypes : types })
@@ -345,12 +461,66 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem
345461 if propertiesJSON == "" {
346462 return nil , ErrUnexpectedValue
347463 }
348- properties := & hcsschema.Properties {}
349- if err := json .Unmarshal ([]byte (propertiesJSON ), properties ); err != nil {
464+ props := & hcsschema.Properties {}
465+ if err := json .Unmarshal ([]byte (propertiesJSON ), props ); err != nil {
350466 return nil , makeSystemError (computeSystem , operation , err , nil )
351467 }
352468
353- return properties , nil
469+ return props , nil
470+ }
471+
472+ // PropertiesV2 returns the requested compute systems properties targeting a V2 schema compute system.
473+ func (computeSystem * System ) PropertiesV2 (ctx context.Context , types ... hcsschema.PropertyType ) (_ * hcsschema.Properties , err error ) {
474+ computeSystem .handleLock .RLock ()
475+ defer computeSystem .handleLock .RUnlock ()
476+
477+ // Let HCS tally up the total for VM based queries instead of querying ourselves.
478+ if computeSystem .typ != "container" {
479+ return computeSystem .hcsPropertiesV2Query (ctx , types )
480+ }
481+
482+ // Define a starter Properties struct with the default fields returned from every
483+ // query. Owner is only returned from Statistics but it's harmless to include.
484+ properties := & hcsschema.Properties {
485+ Id : computeSystem .id ,
486+ SystemType : computeSystem .typ ,
487+ RuntimeOsType : computeSystem .os ,
488+ Owner : computeSystem .owner ,
489+ }
490+
491+ logEntry := log .G (ctx )
492+ // First lets try and query ourselves without reaching to HCS. If any of the queries fail
493+ // we'll take note and fallback to querying HCS for any of the failed types.
494+ fallbackTypes , err := computeSystem .queryInProc (ctx , properties , types )
495+ if err == nil && len (fallbackTypes ) == 0 {
496+ return properties , nil
497+ } else if err != nil {
498+ logEntry .WithError (fmt .Errorf ("failed to query compute system properties in-proc: %w" , err ))
499+ fallbackTypes = types
500+ }
501+
502+ logEntry .WithFields (logrus.Fields {
503+ logfields .ContainerID : computeSystem .id ,
504+ "propertyTypes" : fallbackTypes ,
505+ }).Info ("falling back to HCS for property type queries" )
506+
507+ hcsProperties , err := computeSystem .hcsPropertiesV2Query (ctx , fallbackTypes )
508+ if err != nil {
509+ return nil , err
510+ }
511+
512+ // Now add in anything that we might have successfully queried in process.
513+ if properties .Statistics != nil {
514+ hcsProperties .Statistics = properties .Statistics
515+ hcsProperties .Owner = properties .Owner
516+ }
517+
518+ // For future support for querying processlist in-proc as well.
519+ if properties .ProcessList != nil {
520+ hcsProperties .ProcessList = properties .ProcessList
521+ }
522+
523+ return hcsProperties , nil
354524}
355525
356526// Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.
0 commit comments