@@ -12,23 +12,24 @@ let log (msg : string) =
1212 let d = DateTime.Now.ToString( " HH:mm:ss.fff" )
1313 printfn $" {d} {msg}"
1414
15- /// File * AST
16- type FileAST = string * ParsedInput
15+ /// File * code * AST
16+ type FileAST = string * string * ParsedInput
1717
1818type List < 'a > = System.Collections.Generic.List< 'a>
1919
2020type Node =
2121 {
2222 Name : string
23+ Code : string
2324 AST : ParsedInput
2425 Top : LongIdent
2526 ModuleRefs : LongIdent []
2627 // Order of the file in the project. Files with lower number cannot depend on files with higher number
2728 Idx : int
2829 }
2930
30- /// Filenames with dependencies
31- type Graph = ( string * string[])[]
31+ /// Filenames with sizes and dependencies
32+ type Graph = ( string * int64 * string[])[]
3233
3334let extractModuleSegments ( stuff : Stuff ) : LongIdent [] =
3435 stuff
@@ -129,11 +130,49 @@ let rec searchInTrie (trie : TrieNode) (path : LongIdent) : TrieNode option =
129130 | false , _ ->
130131 None
131132
133+ let analyseEfficiency ( files : Graph ) : unit =
134+ let totalFileSize =
135+ files
136+ |> Array.sumBy ( fun ( file , size , deps ) -> size)
137+
138+ let nodes =
139+ files
140+ |> Seq.map ( fun ( f , s , d ) -> f, ( f, s, d))
141+ |> dict
142+
143+ let depths =
144+ files
145+ |> Seq.map ( fun ( f , s , d ) -> KeyValuePair( f, - 1 L))
146+ |> Dictionary<_,_>
147+
148+ let rec dfs ( file : string ) =
149+ match depths[ file] with
150+ | - 1 L ->
151+ // Visit this node
152+ let ( f , s , d ) = nodes[ file]
153+ let deepestChild =
154+ match d with
155+ | [||] -> 0 L
156+ | d -> d |> Array.map dfs |> Array.max
157+ let depth = s + deepestChild
158+ depths[ file] <- depth
159+ printfn $" Depth[{file}] = {depth}"
160+ depth
161+ | d ->
162+ d
163+
164+ let maxDepth =
165+ files
166+ |> Array.map ( fun ( f , s , d ) -> dfs f)
167+ |> Array.max
168+
169+ printfn $" Total file size: {totalFileSize}. Max depth: {maxDepth}. Max Depth/Size = {maxDepth / totalFileSize}"
170+
132171let detectFileDependencies ( nodes : FileAST []) : Graph =
133172 // Create ASTs, extract module refs
134173 let nodes =
135174 nodes
136- |> Array.Parallel.mapi ( fun i ( name , ast ) ->
175+ |> Array.Parallel.mapi ( fun i ( name , code , ast ) ->
137176 let typeAndModuleRefs =
138177 try
139178 visit ast |> Some
@@ -146,6 +185,7 @@ let detectFileDependencies (nodes : FileAST[]) : Graph =
146185 let moduleRefs = extractModuleSegments typeAndModuleRefs
147186 {
148187 Name = name
188+ Code = code
149189 AST = ast
150190 Top = top
151191 ModuleRefs = moduleRefs
@@ -158,83 +198,86 @@ let detectFileDependencies (nodes : FileAST[]) : Graph =
158198 let trie = buildTrie nodes
159199
160200 // Find dependencies for all files (can be in parallel)
161- nodes
162- |> Array.Parallel.map ( fun node ->
163- let trie = cloneTrie trie
164-
165- // Keep a list of visited nodes (ie. all reachable nodes and all their ancestors)
166- let visited = emptyList< TrieNode>()
167-
168- let markVisited ( node : TrieNode ) =
169- if not node.Visited then
170- node.Visited <- true
171- visited.Add( node)
172-
173- // Keep a list of reachable nodes (ie. ones that can be prefixes for later module/type references)
174- let reachable = emptyList< TrieNode>()
175-
176- let markReachable ( node : TrieNode ) =
177- if not node.Reachable then
178- node.Reachable <- true
179- reachable.Add( node)
180- markVisited node
181-
182- // Mark root (no prefix) as reachable and visited
183- markReachable trie
184-
185- let rec extend ( id : LongIdent ) ( node : TrieNode ) =
186- let rec extend ( node : TrieNode ) ( id : LongIdent ) =
187- match id with
188- // Reached end of the identifier - new reachable node
189- | [] ->
190- Some node
191- // More segments exist
192- | segment :: rest ->
193- // Visit (not 'reach') the TrieNode
194- markVisited node
195- match node.Children.TryGetValue( segment.idText) with
196- // A child for the segment exists - continue there
197- | true , child ->
198- extend child rest
199- // A child for the segment doesn't exist - stop, since we don't care about the non-existent part of the Trie
200- | false , _ ->
201- None
202- extend node id
203-
204- // Process module refs in order, marking more and more TrieNodes as reachable
205- let processRef ( id : LongIdent ) =
206- let newReachables =
207- // Start at every reachable node,
208- reachable
209- // extend a reachable node by 'id', but without creating new nodes, mark all seen nodes as visited and the final one as reachable
210- |> Seq.choose ( extend id)
211- |> Seq.toArray
212- newReachables
213- |> Array.iter markReachable
214-
215- // Add top-level module/namespace as the first reference (possibly not necessary as maybe already in the list)
216- let moduleRefs =
217- Array.append [| node.Top|] node.ModuleRefs
218-
219- // Process all refs
220- moduleRefs
221- |> Array.iter processRef
222-
223- // Collect files from all visited TrieNodes
224- let reachableItems =
225- visited
226- |> Seq.collect ( fun node -> node.GraphNodes)
227- |> Seq.toArray
201+ let graph =
202+ nodes
203+ |> Array.Parallel.map ( fun node ->
204+ let trie = cloneTrie trie
228205
229- // Return the node and its dependencies
230- let deps =
231- reachableItems
232- // We know a file can't depend on a file further down in the project definition (or on itself)
233- |> Seq.filter ( fun n -> n.Idx < node.Idx)
234- |> Seq.map ( fun n -> n.Name)
235- |> Seq.toArray
236- node.Name, deps
237- )
206+ // Keep a list of visited nodes (ie. all reachable nodes and all their ancestors)
207+ let visited = emptyList< TrieNode>()
208+
209+ let markVisited ( node : TrieNode ) =
210+ if not node.Visited then
211+ node.Visited <- true
212+ visited.Add( node)
213+
214+ // Keep a list of reachable nodes (ie. ones that can be prefixes for later module/type references)
215+ let reachable = emptyList< TrieNode>()
216+
217+ let markReachable ( node : TrieNode ) =
218+ if not node.Reachable then
219+ node.Reachable <- true
220+ reachable.Add( node)
221+ markVisited node
222+
223+ // Mark root (no prefix) as reachable and visited
224+ markReachable trie
225+
226+ let rec extend ( id : LongIdent ) ( node : TrieNode ) =
227+ let rec extend ( node : TrieNode ) ( id : LongIdent ) =
228+ match id with
229+ // Reached end of the identifier - new reachable node
230+ | [] ->
231+ Some node
232+ // More segments exist
233+ | segment :: rest ->
234+ // Visit (not 'reach') the TrieNode
235+ markVisited node
236+ match node.Children.TryGetValue( segment.idText) with
237+ // A child for the segment exists - continue there
238+ | true , child ->
239+ extend child rest
240+ // A child for the segment doesn't exist - stop, since we don't care about the non-existent part of the Trie
241+ | false , _ ->
242+ None
243+ extend node id
244+
245+ // Process module refs in order, marking more and more TrieNodes as reachable
246+ let processRef ( id : LongIdent ) =
247+ let newReachables =
248+ // Start at every reachable node,
249+ reachable
250+ // extend a reachable node by 'id', but without creating new nodes, mark all seen nodes as visited and the final one as reachable
251+ |> Seq.choose ( extend id)
252+ |> Seq.toArray
253+ newReachables
254+ |> Array.iter markReachable
255+
256+ // Add top-level module/namespace as the first reference (possibly not necessary as maybe already in the list)
257+ let moduleRefs =
258+ Array.append [| node.Top|] node.ModuleRefs
259+
260+ // Process all refs
261+ moduleRefs
262+ |> Array.iter processRef
263+
264+ // Collect files from all visited TrieNodes
265+ let reachableItems =
266+ visited
267+ |> Seq.collect ( fun node -> node.GraphNodes)
268+ |> Seq.toArray
269+
270+ // Return the node and its dependencies
271+ let deps =
272+ reachableItems
273+ // We know a file can't depend on a file further down in the project definition (or on itself)
274+ |> Seq.filter ( fun n -> n.Idx < node.Idx)
275+ |> Seq.map ( fun n -> n.Name)
276+ |> Seq.toArray
277+ node.Name, ( int64) node.Code.Length, deps
278+ )
279+ analyseEfficiency graph
280+ graph
238281
239282[<Test>]
240283let TestDepsResolver () =
@@ -304,31 +347,27 @@ type B = int
304347 ]
305348 let nodes =
306349 files
307- |> List.map ( fun ( name , code ) -> name, parseSourceCode( name, code))
350+ |> List.map ( fun ( name , code ) -> name, code , parseSourceCode( name, code))
308351 |> List.toArray
309352
310353 let graph = detectFileDependencies nodes
311354
312355 printfn " Detected file dependencies:"
313356 graph
314- |> Array.iter ( fun ( file , deps ) -> printfn $" {file} -> %+A {deps}" )
357+ |> Array.iter ( fun ( file , codeSize , deps ) -> printfn $" {file} -> %+A {deps}" )
315358
316359[<Test>]
317360let Test () =
318361 log " start"
319362 let m = AnalyzerManager()
320- //let projectFile = @"C:\projekty\fsharp\fsharp_main\src\Compiler\FSharp.Compiler.Service.fsproj"
363+ // let projectFile = @"C:\projekty\fsharp\fsharp_main\src\Compiler\FSharp.Compiler.Service.fsproj"
321364 let projectFile = @" C:\projekty\fsharp\heuristic\tests\FSharp.Compiler.ComponentTests\FSharp.Compiler.ComponentTests.fsproj"
322365 let analyzer = m.GetProject( projectFile)
323366 let results = analyzer.Build()
324367 log " built"
325368
326369 let res = results.Results |> Seq.head
327370 let files = res.SourceFiles
328- // Filter out FSI files as they are not supported (ignore FSX too)
329- let files =
330- files
331- |> Array.filter ( fun f -> f.EndsWith( " .fs" ))
332371 let n =
333372 let args = Environment.GetCommandLineArgs()
334373 match args.Length with
@@ -343,20 +382,20 @@ let Test () =
343382 |> Array.Parallel.map ( fun f ->
344383 let code = System.IO.File.ReadAllText( f)
345384 let ast = getParseResults code
346- f, ast
385+ f, code , ast
347386 )
348387 let N = files.Length
349388 log $" {N} files read and parsed"
350389
351390 let graph = detectFileDependencies files
352391 log " deps detected"
353392
354- let totalDeps = graph |> Array.sumBy ( fun ( f , deps ) -> deps.Length)
393+ let totalDeps = graph |> Array.sumBy ( fun ( f , codeSize , deps ) -> deps.Length)
355394 let maxPossibleDeps = ( N * ( N-1 )) / 2
356395 //graph
357396 //|> Array.iter (fun (file, deps) -> printfn $"{file} -> %+A{deps}")
358397
359- let graph = graph |> dict
398+ let graph = graph |> Array.map ( fun ( f , codeSize , deps ) -> f , deps ) |> dict
360399 let json = JsonConvert.SerializeObject( graph, Formatting.Indented)
361400 System.IO.File.WriteAllText( " deps_graph.json" , json)
362401
0 commit comments