@@ -1430,3 +1430,53 @@ def test_sample_cohorts(sample_sets):
14301430 if sample_sets == ["AG1000G-AO" , "AG1000G-FR" ]:
14311431 assert df_coh .sample_id [0 ] == "AR0047-C"
14321432 assert df_coh .sample_id [103 ] == "AP0017-Cx"
1433+
1434+
1435+ @pytest .mark .parametrize (
1436+ "region_raw" ,
1437+ [
1438+ "AGAP007280" ,
1439+ "3L" ,
1440+ "2R:48714463-48715355" ,
1441+ "2L:24,630,355-24,633,221" ,
1442+ Region ("2R" , 48714463 , 48715355 ),
1443+ ],
1444+ )
1445+ def test_locate_region (region_raw ):
1446+
1447+ ag3 = setup_ag3 ()
1448+ gene_annotation = ag3 .geneset (["ID" ])
1449+ loc_region , region = ag3 .locate_region (region_raw )
1450+
1451+ pos , ref , _ = ag3 .snp_sites (region = region .contig )
1452+
1453+ # check types
1454+ assert isinstance (loc_region , slice )
1455+ assert isinstance (region , Region )
1456+
1457+ # check Region with contig
1458+ if region_raw == "3L" :
1459+ assert region .contig == "3L"
1460+ assert region .start is None
1461+ assert region .end is None
1462+
1463+ # check that Region goes through unchanged
1464+ if isinstance (region_raw , Region ):
1465+ assert region == region_raw
1466+
1467+ # check that gene name matches coordinates from the geneset and matches gene sequence
1468+ if region_raw == "AGAP007280" :
1469+ gene = gene_annotation .query ("ID == 'AGAP007280'" ).squeeze ()
1470+ assert region == Region (gene .contig , gene .start , gene .end )
1471+ assert pos [loc_region ][0 ] == gene .start
1472+ assert pos [loc_region ][- 1 ] == gene .end
1473+ assert (
1474+ ref [loc_region ][:5 ].compute ()
1475+ == np .array (["A" , "T" , "G" , "G" , "C" ], dtype = "S1" )
1476+ ).all ()
1477+
1478+ # check string parsing
1479+ if region_raw == "2R:48714463-48715355" :
1480+ assert region == Region ("2R" , 48714463 , 48715355 )
1481+ if region_raw == "2L:24,630,355-24,633,221" :
1482+ assert region == Region ("2L" , 24630355 , 24633221 )
0 commit comments