Skip to content

Commit d4fb474

Browse files
committed
[allhic] Iteratively run pruneByDensity()
1 parent 8cddb35 commit d4fb474

File tree

1 file changed

+56
-32
lines changed

1 file changed

+56
-32
lines changed

clm.go

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -197,38 +197,54 @@ func (r *CLMFile) ParseClm() {
197197
// calculateDensities calculated the density of inter-contig links per base.
198198
// Strong contigs are considered to have high level of inter-contig links in the current
199199
// partition.
200-
func (r *CLMFile) calculateDensities() []float64 {
200+
func (r *CLMFile) calculateDensities() ([]float64, []int) {
201201
N := len(r.Tigs)
202202
densities := make([]int, N)
203203
for pair, contact := range r.contacts {
204-
densities[pair.ai] += contact.nlinks
205-
densities[pair.bi] += contact.nlinks
204+
ai := pair.ai
205+
bi := pair.bi
206+
if r.Tigs[ai].IsActive && r.Tigs[bi].IsActive {
207+
densities[ai] += contact.nlinks
208+
densities[bi] += contact.nlinks
209+
}
206210
}
207211

208-
logdensities := make([]float64, N)
212+
activeCounts, _ := r.reportActive(false)
213+
logdensities := make([]float64, activeCounts)
214+
active := make([]int, activeCounts)
215+
idx := 0
209216
for i, tig := range r.Tigs {
210-
d := float64(densities[i])
211-
s := float64(min(tig.Size, 500000))
212-
logdensities[i] = math.Log10(d / s)
217+
if tig.IsActive {
218+
d := float64(densities[i])
219+
s := float64(min(tig.Size, 500000))
220+
logdensities[idx] = math.Log10(d / s)
221+
active[idx] = tig.Idx
222+
idx++
223+
}
213224
}
214-
return logdensities
225+
return logdensities, active
215226
}
216227

217228
// pruneByDensity selects active contigs based on logdensities
218229
func (r *CLMFile) pruneByDensity() {
219-
logdensities := r.calculateDensities()
220-
lb, ub := OutlierCutoff(logdensities)
221-
log.Noticef("Log10(link_densities) ~ [%.5f, %.5f]", lb, ub)
222-
invalid := 0
223-
for i, tig := range r.Tigs {
224-
if logdensities[i] < lb && tig.Size < MINSIZE*10 {
225-
r.Tigs[i].IsActive = false
226-
invalid++
230+
for {
231+
logdensities, active := r.calculateDensities()
232+
lb, ub := OutlierCutoff(logdensities)
233+
log.Noticef("Log10(link_densities) ~ [%.5f, %.5f]", lb, ub)
234+
invalid := 0
235+
for i, idx := range active {
236+
tig := r.Tigs[idx]
237+
if logdensities[i] < lb && tig.Size < MINSIZE*10 {
238+
r.Tigs[idx].IsActive = false
239+
invalid++
240+
}
241+
}
242+
if invalid > 0 {
243+
log.Noticef("Inactivated %d tigs with log10_density < %.5f",
244+
invalid, lb)
245+
} else {
246+
break
227247
}
228-
}
229-
if invalid > 0 {
230-
log.Noticef("Inactivated %d tigs with log10_density < %.5f",
231-
invalid, lb)
232248
}
233249
}
234250

@@ -302,14 +318,16 @@ func (r *CLMFile) pruneTour() {
302318
invalid, lb)
303319
}
304320

305-
newTour.Tigs = newTour.Tigs[:0]
321+
activeCounts, _ := r.reportActive(true)
322+
newTour.Tigs = make([]Tig, activeCounts)
323+
idx := 0
306324
for _, tig := range tour.Tigs {
307325
if r.Tigs[tig.Idx].IsActive {
308-
newTour.Tigs = append(newTour.Tigs, tig)
326+
newTour.Tigs[idx] = tig
327+
idx++
309328
}
310329
}
311330
r.Tour = newTour
312-
r.reportActive()
313331
}
314332
}
315333

@@ -325,39 +343,45 @@ func (r *CLMFile) pruneTour() {
325343
// tourfile. In this case, the active contig list and orientations are
326344
// derived from the last tour in the file.
327345
func (r *CLMFile) Activate(shuffle bool) {
328-
r.reportActive()
346+
N := len(r.Tigs)
347+
r.reportActive(true)
329348
r.pruneByDensity()
330349
//r.pruneBySize()
331-
r.reportActive()
350+
activeCounts, _ := r.reportActive(true)
332351

352+
r.Tour.Tigs = make([]Tig, activeCounts)
353+
idx := 0
333354
for _, tig := range r.Tigs {
334355
if tig.IsActive {
335-
r.Tour.Tigs = append(r.Tour.Tigs, Tig{tig.Idx, tig.Size})
356+
r.Tour.Tigs[idx] = Tig{tig.Idx, tig.Size}
357+
idx++
336358
}
337359
}
338360

339361
r.Tour.M = r.M()
340362
if shuffle {
341363
r.Tour.Shuffle()
342364
}
343-
r.Signs = make([]byte, r.Tour.Len())
344-
for i := 0; i < r.Tour.Len(); i++ {
365+
r.Signs = make([]byte, N)
366+
for i := 0; i < N; i++ {
345367
r.Signs[i] = '+'
346368
}
347369
r.flipAll() // Initialize with the signs of the tigs
348370
}
349371

350372
// reportActive prints number and total length of active contigs
351-
func (r *CLMFile) reportActive() {
352-
activeCounts := 0
353-
sumLength := 0
373+
func (r *CLMFile) reportActive(verbose bool) (activeCounts, sumLength int) {
354374
for _, tig := range r.Tigs {
355375
if tig.IsActive {
356376
activeCounts++
357377
sumLength += tig.Size
358378
}
359379
}
360-
log.Noticef("Active tigs: %d (length=%d)", activeCounts, sumLength)
380+
381+
if verbose {
382+
log.Noticef("Active tigs: %d (length=%d)", activeCounts, sumLength)
383+
}
384+
return
361385
}
362386

363387
// M yields a contact frequency matrix, where each cell contains how many

0 commit comments

Comments
 (0)