comments, a few extra items on the todo list, and a few cosmetic changes to the code

fscottfoti · fscottfoti · commit 8a311d9064d7 · 2015-02-24T14:55:53.000-08:00
diff --git a/example/README.md b/example/README.md
@@ -20,8 +20,8 @@ categories are shared?  Income categories are unique?
 
 A few overarching principles
 
-* A little discussion of NOT so object oriented - this is more like a 
-database - data is in tables, NOT in objects
+* A little discussion of "NOT so object oriented" - this is more like a 
+database - data is in standard tables, NOT in objects
 
 * The implications of this are that most of the core code is pandas and thus 
 the quality is controlled by the larger community.  We are thankful that its 
@@ -45,13 +45,19 @@ Case study of `num_under16_not_at_school` to show the inter-dependencies.
 A few questions about "best practices"
 
 * What to put into the default data sources and variable specs and what to 
-put in the examples / client-specific stuff?
+put in the example / client-specific stuff?
 
 * Want to split up injectables from variables from tables or all one big file
  so it's easier to search?
 
 * How much variable computation to put in excel versus Python
 
+* There were some hard coded limits in the original csv - (area_type < 4 and 
+distance_to_work < 3) - these are now just left in the csv spec.  Why would 
+this be different than (income_in_thousands > 50)?  I've made an effort to 
+not have such "magic numbers" in Python code.  (Elizabeth: MAX_NUM_AUTOS 
+exists now) 
+
 * Want to name or number the person types in the spec files?
 
 * Testing for client-specific code?  It's harder because outputs are "data 
diff --git a/example/models.py b/example/models.py
@@ -8,24 +8,36 @@
 import pandas as pd
 
 
+# this is the max number of cars allowable in the auto ownership model
 MAX_NUM_CARS = 5
 
 
+"""
+This part of this file is currently creating small tables to serve as
+alternatives in the various models
+"""
+
+
 @sim.table()
 def auto_alts():
     return asim.identity_matrix(["cars%d" % i for i in range(MAX_NUM_CARS)])
 
 
+@sim.table()
+def mandatory_tour_frequency_alts():
+    return asim.identity_matrix(["work1", "work2", "school1", "school2",
+                                 "work_and_school"])
+
+# these are the alternatives for the workplace choice
 @sim.table()
 def zones():
     # I grant this is a weird idiom but it helps to name the index
     return pd.DataFrame({"TAZ": np.arange(1454)+1}).set_index("TAZ")
 
 
-@sim.table()
-def mandatory_tour_frequency_alts():
-    return asim.identity_matrix(["work1", "work2", "school1", "school2",
-                                 "work_and_school"])
+"""
+Read in the omx files and create the skim objects
+"""
 
 
 @sim.injectable()
@@ -56,15 +68,22 @@ def sovpm_skim(nonmotskm_omx):
     return skim.Skim(nonmotskm_omx['DIST'], offset=-1)
 
 
+"""
+Read in the spec files and reformat as necessary
+"""
+
+
 @sim.injectable()
 def auto_ownership_spec():
     f = os.path.join('configs', "auto_ownership.csv")
+    # FIXME should read in all variables and comment out ones not used
     return asim.read_model_spec(f).head(4*26)
 
 
 @sim.injectable()
 def workplace_location_spec():
     f = os.path.join('configs', "workplace_location.csv")
+    # FIXME should read in all variables and comment out ones not used
     return asim.read_model_spec(f).head(15)
 
 
@@ -80,6 +99,11 @@ def workplace_size_spec():
     return pd.read_csv(f)
 
 
+"""
+This is a special submodel for the workplace location choice
+"""
+
+
 @sim.table()
 def workplace_size_terms(land_use, workplace_size_spec):
     """
@@ -104,6 +128,12 @@ def workplace_size_terms(land_use, workplace_size_spec):
     return new_df
 
 
+"""
+Auto ownership is a standard model which predicts how many cars a household
+with given characteristics owns
+"""
+
+
 @sim.model()
 def auto_ownership_simulate(households,
                             auto_alts,
@@ -129,6 +159,13 @@ def auto_ownership_simulate(households,
     return model_design
 
 
+"""
+The workplace location model predicts the zones in which various people will
+work.  Interestingly there's not really any supply side to this model - we
+assume there are workplaces for the people to work.
+"""
+
+
 @sim.model()
 def workplace_location_simulate(persons,
                                 households,
@@ -159,6 +196,12 @@ def workplace_location_simulate(persons,
     return model_design
 
 
+"""
+This model predicts the frequency of making mandatory trips (see the
+alternatives above) - these trips include work and school in some combination.
+"""
+
+
 @sim.model()
 def mandatory_tour_frequency(persons,
                              households,
@@ -182,6 +225,15 @@ def mandatory_tour_frequency(persons,
     return model_design
 
 
+"""
+This section contains computed columns on each table.
+"""
+
+"""
+for the land use table
+"""
+
+
 @sim.column("land_use")
 def total_households(land_use):
     return land_use.local.TOTHH
@@ -202,12 +254,17 @@ def county_id(land_use):
     return land_use.local.COUNTY
 
 
+"""
+for households
+"""
+
 # just a rename / alias
 @sim.column("households")
 def home_taz(households):
     return households.TAZ
 
 
+# map household type ids to strings
 @sim.column("households")
 def household_type(households, settings):
     return households.HHT.map(settings["household_type_map"])
@@ -235,60 +292,58 @@ def num_under16_not_at_school(persons, households):
         reindex(households.index).fillna(0)
 
 
-# TODO - this is my "placeholder" for the CDAP model ;)
+"""
+for the persons table
+"""
+# FIXME - this is my "placeholder" for the CDAP model ;)
 @sim.column("persons")
 def cdap_activity(persons):
     return pd.Series(np.random.randint(3, size=len(persons)),
                      index=persons.index).map({0: 'M', 1: 'N', 2: 'H'})
 
 
-@sim.column("persons")
-def under16_not_at_school(persons):
-    return (persons.ptype_cat.isin(["school", "preschool"]) &
-            persons.cdap_activity.isin(["N", "H"]))
-
-
-# for now, this really is just a dictionary lookup
+# convert employment categories to string descriptors
 @sim.column("persons")
 def employed_cat(persons, settings):
     return persons.pemploy.map(settings["employment_map"])
 
 
+# convert student categories to string descriptors
 @sim.column("persons")
 def student_cat(persons, settings):
     return persons.pstudent.map(settings["student_map"])
 
 
+# convert person type categories to string descriptors
 @sim.column("persons")
 def ptype_cat(persons, settings):
     return persons.ptype.map(settings["person_type_map"])
 
 
+# borrowing these definitions from the original code
 @sim.column("persons")
 def student_is_employed(persons):
-    pt = persons.ptype_cat
-    ec = persons.employed_cat
-    return (pt.isin(['university', 'driving']) &
-            ec.isin(['full', 'part']))
+    return (persons.ptype_cat.isin(['university', 'driving']) &
+            persons.employed_cat.isin(['full', 'part']))
 
 
 @sim.column("persons")
 def nonstudent_to_school(persons):
-    pt = persons.ptype_cat
-    sc = persons.student_cat
-    return (pt.isin(['full', 'part', 'nonwork', 'retired']) &
-            sc.isin(['high', 'college']))
+    return (persons.ptype_cat.isin(['full', 'part', 'nonwork', 'retired']) &
+            persons.student_cat.isin(['high', 'college']))
 
 
 @sim.column("persons")
-def distance_to_work(persons, distance_skim):
-    return pd.Series(distance_skim.get(persons.home_taz,
-                                       persons.workplace_taz),
-                     index=persons.index)
+def under16_not_at_school(persons):
+    return (persons.ptype_cat.isin(["school", "preschool"]) &
+            persons.cdap_activity.isin(["N", "H"]))
 
 
 @sim.column("persons")
 def workplace_taz(persons):
+    # FIXME this is really because we ask for ALL columns in the persons data
+    # FIXME frame - urbansim actually only asks for the columns that are used by
+    # FIXME the model specs in play at that time
     return pd.Series(1, persons.index)
 
 
@@ -304,13 +359,24 @@ def school_taz(persons):
     return persons.workplace_taz
 
 
+# this use the distance skims to compute the raw distance to work from home
+@sim.column("persons")
+def distance_to_work(persons, distance_skim):
+    return pd.Series(distance_skim.get(persons.home_taz,
+                                       persons.workplace_taz),
+                     index=persons.index)
+
+
+# same deal but to school
 @sim.column("persons")
 def distance_to_school(persons, distance_skim):
     return pd.Series(distance_skim.get(persons.home_taz,
                                        persons.school_taz),
                      index=persons.index)
 
 
+# similar but this adds the am peak travel time to the pm peak travel time in
+# the opposite direction (by car)
 @sim.column("persons")
 def roundtrip_auto_time_to_work(persons, sovam_skim, sovpm_skim):
     return pd.Series(sovam_skim.get(persons.home_taz,
@@ -320,6 +386,8 @@ def roundtrip_auto_time_to_work(persons, sovam_skim, sovpm_skim):
                      index=persons.index)
 
 
+# this adds the am peak travel time to the md peak travel time in
+# the opposite direction (by car), assuming students leave school earlier
 @sim.column("persons")
 def roundtrip_auto_time_to_school(persons, sovam_skim, sovmd_skim):
     return pd.Series(sovam_skim.get(persons.home_taz,