Skip to content

Commit e5d9878

Browse files
committed
cleaning dup codes to function
1 parent b6fd307 commit e5d9878

File tree

5 files changed

+106
-188
lines changed

5 files changed

+106
-188
lines changed

activitysim/core/interaction_sample.py

Lines changed: 7 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
simulate,
1515
tracing,
1616
workflow,
17+
util,
1718
)
1819
from activitysim.core.skim_dataset import DatasetWrapper
1920
from activitysim.core.skim_dictionary import SkimWrapper
@@ -238,63 +239,12 @@ def _interaction_sample(
238239
# if not estimation mode, drop unused columns
239240
if not have_trace_targets:
240241

241-
# keep only variables needed for spec
242-
import re
243-
244-
# define a regular expression to find variables in spec
245-
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"
246-
247-
unique_variables_in_spec = set(
248-
spec.reset_index()["Expression"]
249-
.apply(lambda x: re.findall(pattern, x))
250-
.sum()
251-
)
252-
253-
# when sharrow mode, need to keep skim variables in the chooser table
254-
# if sharrow_enabled:
255-
if locals_d:
256-
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
257-
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
258-
if locals_d.get("timeframe") == "trip":
259-
orig_col_name = locals_d.get("ORIGIN", None)
260-
dest_col_name = locals_d.get("DESTINATION", None)
261-
stop_col_name = None
262-
parking_col_name = None
263-
primary_origin_col_name = None
264-
if orig_col_name is None and "od_skims" in locals_d:
265-
orig_col_name = locals_d["od_skims"].orig_key
266-
if dest_col_name is None and "od_skims" in locals_d:
267-
dest_col_name = locals_d["od_skims"].dest_key
268-
if stop_col_name is None and "dp_skims" in locals_d:
269-
stop_col_name = locals_d["dp_skims"].dest_key
270-
if primary_origin_col_name is None and "dnt_skims" in locals_d:
271-
primary_origin_col_name = locals_d["dnt_skims"].dest_key
272-
unique_variables_in_spec.add(orig_col_name)
273-
unique_variables_in_spec.add(dest_col_name)
274-
unique_variables_in_spec.add(parking_col_name)
275-
unique_variables_in_spec.add(primary_origin_col_name)
276-
unique_variables_in_spec.add(stop_col_name)
277-
unique_variables_in_spec.add("trip_period")
278-
unique_variables_in_spec.add("purpose_index_num")
279-
280-
unique_variables_in_spec.add("proto_person_id")
281-
unique_variables_in_spec.add("person_id")
282-
unique_variables_in_spec.add("tour_id")
283-
unique_variables_in_spec.add("tour_mode")
284-
logger.info("Dropping unused variables in chooser table")
285-
286-
logger.info(
287-
"before dropping, the choosers table has {} columns: {}".format(
288-
len(choosers.columns), choosers.columns
289-
)
290-
)
291-
for c in choosers.columns:
292-
if c not in unique_variables_in_spec:
293-
choosers = choosers.drop(c, axis=1)
294-
logger.info(
295-
"after dropping, the choosers table has {} columns: {}".format(
296-
len(choosers.columns), choosers.columns
297-
)
242+
choosers = util.drop_unused_chooser_columns(
243+
choosers,
244+
spec,
245+
locals_d,
246+
custom_chooser=None,
247+
sharrow_enabled=sharrow_enabled,
298248
)
299249

300250
if sharrow_enabled:

activitysim/core/interaction_sample_simulate.py

Lines changed: 8 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import pandas as pd
99

10-
from activitysim.core import chunk, interaction_simulate, logit, tracing, workflow
10+
from activitysim.core import chunk, interaction_simulate, logit, tracing, workflow, util
1111
from activitysim.core.simulate import set_skim_wrapper_targets
1212

1313
logger = logging.getLogger(__name__)
@@ -136,62 +136,20 @@ def _interaction_sample_simulate(
136136
)
137137

138138
# drop variables before the interaction dataframe is created
139+
sharrow_enabled = state.settings.sharrow
139140

140141
# check if tracing is enabled and if we have trace targets
141142
# if not estimation mode, drop unused columns
142143
if not have_trace_targets:
143144

144-
# keep only variables needed for spec
145-
import re
146-
147-
# define a regular expression to find variables in spec
148-
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"
149-
150-
unique_variables_in_spec = set(
151-
spec.reset_index()["Expression"]
152-
.apply(lambda x: re.findall(pattern, x))
153-
.sum()
145+
choosers = util.drop_unused_chooser_columns(
146+
choosers,
147+
spec,
148+
locals_d,
149+
custom_chooser=None,
150+
sharrow_enabled=sharrow_enabled,
154151
)
155152

156-
# when sharrow mode, need to keep skim variables in the chooser table
157-
# if sharrow_enabled:
158-
if locals_d:
159-
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
160-
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
161-
if locals_d.get("timeframe") == "trip":
162-
orig_col_name = locals_d.get("ORIGIN", None)
163-
dest_col_name = locals_d.get("DESTINATION", None)
164-
stop_col_name = None
165-
parking_col_name = None
166-
primary_origin_col_name = None
167-
if orig_col_name is None and "od_skims" in locals_d:
168-
orig_col_name = locals_d["od_skims"].orig_key
169-
if dest_col_name is None and "od_skims" in locals_d:
170-
dest_col_name = locals_d["od_skims"].dest_key
171-
if stop_col_name is None and "dp_skims" in locals_d:
172-
stop_col_name = locals_d["dp_skims"].dest_key
173-
if primary_origin_col_name is None and "dnt_skims" in locals_d:
174-
primary_origin_col_name = locals_d["dnt_skims"].dest_key
175-
unique_variables_in_spec.add(orig_col_name)
176-
unique_variables_in_spec.add(dest_col_name)
177-
unique_variables_in_spec.add(parking_col_name)
178-
unique_variables_in_spec.add(primary_origin_col_name)
179-
unique_variables_in_spec.add(stop_col_name)
180-
unique_variables_in_spec.add("trip_period")
181-
unique_variables_in_spec.add("purpose_index_num")
182-
183-
unique_variables_in_spec.add("proto_person_id")
184-
unique_variables_in_spec.add("person_id")
185-
unique_variables_in_spec.add("tour_id")
186-
unique_variables_in_spec.add("tour_mode")
187-
unique_variables_in_spec.add("household_id")
188-
unique_variables_in_spec.add("parent_tour_id")
189-
logger.info("Dropping unused variables in chooser table")
190-
191-
for c in choosers.columns:
192-
if c not in unique_variables_in_spec:
193-
choosers = choosers.drop(c, axis=1)
194-
195153
interaction_df = alternatives.join(choosers, how="left", rsuffix="_chooser")
196154
logger.info(
197155
f"{trace_label} end merging choosers and alternatives to create interaction_df"

activitysim/core/interaction_simulate.py

Lines changed: 7 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import pandas as pd
1414

1515
from . import chunk, config, logit, simulate, tracing, workflow
16+
from activitysim.core import util
1617

1718
logger = logging.getLogger(__name__)
1819

@@ -702,53 +703,14 @@ def _interaction_simulate(
702703
# if not estimation mode, drop unused columns
703704
if (not have_trace_targets) and (estimator is None):
704705

705-
# drop_variable = True
706-
707-
# keep only variables needed for spec
708-
import re
709-
710-
# define a regular expression to find variables in spec
711-
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"
712-
713-
unique_variables_in_spec = set(
714-
spec.reset_index()["Expression"]
715-
.apply(lambda x: re.findall(pattern, x))
716-
.sum()
706+
choosers = util.drop_unused_chooser_columns(
707+
choosers,
708+
spec,
709+
locals_d,
710+
custom_chooser=None,
711+
sharrow_enabled=sharrow_enabled,
717712
)
718713

719-
# when sharrow mode, need to keep skim variables in the chooser table
720-
if sharrow_enabled:
721-
if locals_d:
722-
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
723-
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
724-
if locals_d.get("timeframe") == "trip":
725-
orig_col_name = locals_d.get("ORIGIN", None)
726-
dest_col_name = locals_d.get("DESTINATION", None)
727-
stop_col_name = None
728-
parking_col_name = None
729-
primary_origin_col_name = None
730-
if orig_col_name is None and "od_skims" in locals_d:
731-
orig_col_name = locals_d["od_skims"].orig_key
732-
if dest_col_name is None and "od_skims" in locals_d:
733-
dest_col_name = locals_d["od_skims"].dest_key
734-
if stop_col_name is None and "dp_skims" in locals_d:
735-
stop_col_name = locals_d["dp_skims"].dest_key
736-
if primary_origin_col_name is None and "dnt_skims" in locals_d:
737-
primary_origin_col_name = locals_d["dnt_skims"].dest_key
738-
unique_variables_in_spec.add(orig_col_name)
739-
unique_variables_in_spec.add(dest_col_name)
740-
unique_variables_in_spec.add(parking_col_name)
741-
unique_variables_in_spec.add(primary_origin_col_name)
742-
unique_variables_in_spec.add(stop_col_name)
743-
unique_variables_in_spec.add("trip_period")
744-
unique_variables_in_spec.add("purpose_index_num")
745-
746-
logger.info("Dropping unused variables in chooser table")
747-
748-
for c in choosers.columns:
749-
if c not in unique_variables_in_spec:
750-
choosers = choosers.drop(c, axis=1)
751-
752714
if locals_d is not None and locals_d.get("_sharrow_skip", False):
753715
sharrow_enabled = False
754716

activitysim/core/simulate.py

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,47 +1517,20 @@ def _simple_simulate(
15171517
# check if tracing is enabled and if we have trace targets
15181518
have_trace_targets = state.tracing.has_trace_targets(choosers)
15191519

1520+
sharrow_enabled = state.settings.sharrow
1521+
15201522
# if tracing is not enabled, drop unused columns
15211523
# if not estimation mode, drop unused columns
15221524
if (not have_trace_targets) and (estimator is None):
1523-
1524-
# keep only variables needed for spec
1525-
import re
1526-
1527-
# define a regular expression to find variables in spec
1528-
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"
1529-
1530-
unique_variables_in_spec = set(
1531-
spec.reset_index()["Expression"]
1532-
.apply(lambda x: re.findall(pattern, x))
1533-
.sum()
1525+
# drop unused variables in chooser table
1526+
choosers = util.drop_unused_chooser_columns(
1527+
choosers,
1528+
spec,
1529+
locals_d,
1530+
custom_chooser,
1531+
sharrow_enabled=sharrow_enabled,
15341532
)
15351533

1536-
sharrow_enabled = state.settings.sharrow
1537-
1538-
# when sharrow mode, need to keep skim variables in the chooser table
1539-
if sharrow_enabled:
1540-
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
1541-
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
1542-
unique_variables_in_spec.add(locals_d.get("out_time_col_name", None))
1543-
unique_variables_in_spec.add(locals_d.get("in_time_col_name", None))
1544-
unique_variables_in_spec.add("out_period")
1545-
unique_variables_in_spec.add("in_period")
1546-
unique_variables_in_spec.add("trip_period")
1547-
1548-
if custom_chooser:
1549-
import inspect
1550-
1551-
custom_chooser_lines = inspect.getsource(custom_chooser)
1552-
unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines))
1553-
1554-
logger.info("Dropping unused variables in chooser table")
1555-
1556-
# keep only variables needed for spec
1557-
choosers = choosers[
1558-
[c for c in choosers.columns if c in unique_variables_in_spec]
1559-
]
1560-
15611534
if nest_spec is None:
15621535
choices = eval_mnl(
15631536
state,

activitysim/core/util.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,3 +638,78 @@ def zarr_file_modification_time(zarr_dir: Path):
638638
if t == 0:
639639
raise FileNotFoundError(zarr_dir)
640640
return t
641+
642+
643+
def drop_unused_chooser_columns(
644+
choosers, spec, locals_d, custom_chooser, sharrow_enabled=False
645+
):
646+
"""
647+
Drop unused columns from the chooser table, based on the spec and custom_chooser function.
648+
"""
649+
# keep only variables needed for spec
650+
import re
651+
652+
# define a regular expression to find variables in spec
653+
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"
654+
655+
unique_variables_in_spec = set(
656+
spec.reset_index()["Expression"].apply(lambda x: re.findall(pattern, x)).sum()
657+
)
658+
659+
if locals_d:
660+
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
661+
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
662+
if locals_d.get("timeframe") == "trip":
663+
orig_col_name = locals_d.get("ORIGIN", None)
664+
dest_col_name = locals_d.get("DESTINATION", None)
665+
stop_col_name = None
666+
parking_col_name = locals_d.get("PARKING", None)
667+
primary_origin_col_name = None
668+
if orig_col_name is None and "od_skims" in locals_d:
669+
orig_col_name = locals_d["od_skims"].orig_key
670+
if dest_col_name is None and "od_skims" in locals_d:
671+
dest_col_name = locals_d["od_skims"].dest_key
672+
if stop_col_name is None and "dp_skims" in locals_d:
673+
stop_col_name = locals_d["dp_skims"].dest_key
674+
if primary_origin_col_name is None and "dnt_skims" in locals_d:
675+
primary_origin_col_name = locals_d["dnt_skims"].dest_key
676+
unique_variables_in_spec.add(orig_col_name)
677+
unique_variables_in_spec.add(dest_col_name)
678+
unique_variables_in_spec.add(parking_col_name)
679+
unique_variables_in_spec.add(primary_origin_col_name)
680+
unique_variables_in_spec.add(stop_col_name)
681+
unique_variables_in_spec.add("trip_period")
682+
# when using trip_scheduling_choice for trup scheduling
683+
unique_variables_in_spec.add("last_outbound_stop")
684+
unique_variables_in_spec.add("last_inbound_stop")
685+
686+
# when sharrow mode, need to keep the following columns in the choosers table
687+
if sharrow_enabled:
688+
unique_variables_in_spec.add("out_period")
689+
unique_variables_in_spec.add("in_period")
690+
unique_variables_in_spec.add("purpose_index_num")
691+
692+
if custom_chooser:
693+
import inspect
694+
695+
custom_chooser_lines = inspect.getsource(custom_chooser)
696+
unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines))
697+
698+
logger.info("Dropping unused variables in chooser table")
699+
700+
logger.info(
701+
"before dropping, the choosers table has {} columns: {}".format(
702+
len(choosers.columns), choosers.columns
703+
)
704+
)
705+
706+
# keep only variables needed for spec
707+
choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]]
708+
709+
logger.info(
710+
"after dropping, the choosers table has {} columns: {}".format(
711+
len(choosers.columns), choosers.columns
712+
)
713+
)
714+
715+
return choosers

0 commit comments

Comments
 (0)