@@ -197,38 +197,54 @@ func (r *CLMFile) ParseClm() {
197197// calculateDensities calculated the density of inter-contig links per base.
198198// Strong contigs are considered to have high level of inter-contig links in the current
199199// partition.
200- func (r * CLMFile ) calculateDensities () []float64 {
200+ func (r * CLMFile ) calculateDensities () ( []float64 , [] int ) {
201201 N := len (r .Tigs )
202202 densities := make ([]int , N )
203203 for pair , contact := range r .contacts {
204- densities [pair .ai ] += contact .nlinks
205- densities [pair .bi ] += contact .nlinks
204+ ai := pair .ai
205+ bi := pair .bi
206+ if r .Tigs [ai ].IsActive && r .Tigs [bi ].IsActive {
207+ densities [ai ] += contact .nlinks
208+ densities [bi ] += contact .nlinks
209+ }
206210 }
207211
208- logdensities := make ([]float64 , N )
212+ activeCounts , _ := r .reportActive (false )
213+ logdensities := make ([]float64 , activeCounts )
214+ active := make ([]int , activeCounts )
215+ idx := 0
209216 for i , tig := range r .Tigs {
210- d := float64 (densities [i ])
211- s := float64 (min (tig .Size , 500000 ))
212- logdensities [i ] = math .Log10 (d / s )
217+ if tig .IsActive {
218+ d := float64 (densities [i ])
219+ s := float64 (min (tig .Size , 500000 ))
220+ logdensities [idx ] = math .Log10 (d / s )
221+ active [idx ] = tig .Idx
222+ idx ++
223+ }
213224 }
214- return logdensities
225+ return logdensities , active
215226}
216227
217228// pruneByDensity selects active contigs based on logdensities
218229func (r * CLMFile ) pruneByDensity () {
219- logdensities := r .calculateDensities ()
220- lb , ub := OutlierCutoff (logdensities )
221- log .Noticef ("Log10(link_densities) ~ [%.5f, %.5f]" , lb , ub )
222- invalid := 0
223- for i , tig := range r .Tigs {
224- if logdensities [i ] < lb && tig .Size < MINSIZE * 10 {
225- r .Tigs [i ].IsActive = false
226- invalid ++
230+ for {
231+ logdensities , active := r .calculateDensities ()
232+ lb , ub := OutlierCutoff (logdensities )
233+ log .Noticef ("Log10(link_densities) ~ [%.5f, %.5f]" , lb , ub )
234+ invalid := 0
235+ for i , idx := range active {
236+ tig := r .Tigs [idx ]
237+ if logdensities [i ] < lb && tig .Size < MINSIZE * 10 {
238+ r .Tigs [idx ].IsActive = false
239+ invalid ++
240+ }
241+ }
242+ if invalid > 0 {
243+ log .Noticef ("Inactivated %d tigs with log10_density < %.5f" ,
244+ invalid , lb )
245+ } else {
246+ break
227247 }
228- }
229- if invalid > 0 {
230- log .Noticef ("Inactivated %d tigs with log10_density < %.5f" ,
231- invalid , lb )
232248 }
233249}
234250
@@ -302,14 +318,16 @@ func (r *CLMFile) pruneTour() {
302318 invalid , lb )
303319 }
304320
305- newTour .Tigs = newTour .Tigs [:0 ]
321+ activeCounts , _ := r .reportActive (true )
322+ newTour .Tigs = make ([]Tig , activeCounts )
323+ idx := 0
306324 for _ , tig := range tour .Tigs {
307325 if r .Tigs [tig .Idx ].IsActive {
308- newTour .Tigs = append (newTour .Tigs , tig )
326+ newTour .Tigs [idx ] = tig
327+ idx ++
309328 }
310329 }
311330 r .Tour = newTour
312- r .reportActive ()
313331 }
314332}
315333
@@ -325,39 +343,45 @@ func (r *CLMFile) pruneTour() {
325343// tourfile. In this case, the active contig list and orientations are
326344// derived from the last tour in the file.
327345func (r * CLMFile ) Activate (shuffle bool ) {
328- r .reportActive ()
346+ N := len (r .Tigs )
347+ r .reportActive (true )
329348 r .pruneByDensity ()
330349 //r.pruneBySize()
331- r .reportActive ()
350+ activeCounts , _ := r .reportActive (true )
332351
352+ r .Tour .Tigs = make ([]Tig , activeCounts )
353+ idx := 0
333354 for _ , tig := range r .Tigs {
334355 if tig .IsActive {
335- r .Tour .Tigs = append (r .Tour .Tigs , Tig {tig .Idx , tig .Size })
356+ r .Tour .Tigs [idx ] = Tig {tig .Idx , tig .Size }
357+ idx ++
336358 }
337359 }
338360
339361 r .Tour .M = r .M ()
340362 if shuffle {
341363 r .Tour .Shuffle ()
342364 }
343- r .Signs = make ([]byte , r . Tour . Len () )
344- for i := 0 ; i < r . Tour . Len () ; i ++ {
365+ r .Signs = make ([]byte , N )
366+ for i := 0 ; i < N ; i ++ {
345367 r .Signs [i ] = '+'
346368 }
347369 r .flipAll () // Initialize with the signs of the tigs
348370}
349371
350372// reportActive prints number and total length of active contigs
351- func (r * CLMFile ) reportActive () {
352- activeCounts := 0
353- sumLength := 0
373+ func (r * CLMFile ) reportActive (verbose bool ) (activeCounts , sumLength int ) {
354374 for _ , tig := range r .Tigs {
355375 if tig .IsActive {
356376 activeCounts ++
357377 sumLength += tig .Size
358378 }
359379 }
360- log .Noticef ("Active tigs: %d (length=%d)" , activeCounts , sumLength )
380+
381+ if verbose {
382+ log .Noticef ("Active tigs: %d (length=%d)" , activeCounts , sumLength )
383+ }
384+ return
361385}
362386
363387// M yields a contact frequency matrix, where each cell contains how many
0 commit comments