Address review comments

“Avinash” · “Avinash” · commit e175fa6fc909 · 2019-09-04T18:36:32.000-04:00
diff --git a/examples/bert/README.md b/examples/bert/README.md
@@ -15,7 +15,8 @@ To summarize, this example showcases:
 * Building and fine-tuning on downstream tasks
 * Use of Texar `RecordData` module for data loading and processing
 * Use of Texar `Executor` module for simplified training loops and TensorBoard visualization
-* Use of Hyperopt library to tune hyperparameters with `Executor` module
+* Use of [Hyperopt]((https://github.com/hyperopt/hyperopt)) library to tune hyperparameters with 
+`Executor` module
 
 Future work:
 
@@ -188,15 +189,16 @@ To run this example, please install `hyperopt` by issuing the following command
 pip install hyperopt
 ```
 
-`bert_with_tpe.py` shows an example of how to tune hyperparameters with Executor using `hyperopt`. 
+`bert_with_hypertuning_main.py` shows an example of how to tune hyperparameters with Executor using `hyperopt`. 
 To run this example, run the following command
 
 ```commandline
-python bert_with_tpe.py
+python bert_with_hypertuning_main.py
 ```
 
 In this simple example, the hyperparameters to be tuned are provided as a `dict` in
-`bert_tpe_config_classifier.py` which are fed into `objective_func()` . We use `TPE` algorithm for
-tuning the hyperparams (provided in `hyperopt` library). The example runs for 3 trials to find the
-best hyperparam settings. The final model is saved in `/model/{exp_number}` folder. More 
-information about the libary can be found at [Hyperopt](https://github.com/hyperopt/hyperopt)
+`bert_hypertuning_config_classifier.py` which are fed into `objective_func()` . We use `TPE`
+(Tree-structured Parzen Estimator) algorithm for tuning the hyperparams (provided in `hyperopt`
+library). The example runs for 3 trials to find the best hyperparam settings. The final model is
+saved in `output_dir` provided by the user. More  information about the libary can be 
+found at [Hyperopt](https://github.com/hyperopt/hyperopt)
diff --git a/examples/bert/bert_hypertuning_config_classifier.py b/examples/bert/bert_hypertuning_config_classifier.py
diff --git a/examples/bert/bert_with_hypertuning_main.py b/examples/bert/bert_with_hypertuning_main.py
@@ -34,7 +34,7 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
-    "--config-downstream", default="bert_tpe_config_classifier",
+    "--config-downstream", default="bert_hypertuning_config_classifier",
     help="Configuration of the downstream part of the model")
 parser.add_argument(
     '--pretrained-model-name', type=str, default='bert-base-uncased',
@@ -70,13 +70,21 @@
 
 
 class ModelWrapper(nn.Module):
+    r"""This class wraps a model (in this case a BERT classifier) and implements
+    :meth:`forward` and :meth:`predict` to conform to the requirements of
+    :class:`Executor` class. Particularly, :meth:`forward` returns a dict with
+    keys "loss" and "preds" and :meth:`predict` returns a dict with key "preds".
+
+    Args:
+        `model`: BERTClassifier
+            A BERTClassifier model
+    """
+
     def __init__(self, model: BERTClassifier):
         super().__init__()
         self.model = model
 
     def _compute_loss(self, logits, labels):
-        r"""Compute loss.
-        """
         if self.model.is_binary:
             loss = F.binary_cross_entropy(
                 logits.view(-1), labels.view(-1), reduction='mean')
@@ -88,6 +96,18 @@ def _compute_loss(self, logits, labels):
 
     def forward(self,  # type: ignore
                 batch: tx.data.Batch) -> Dict[str, torch.Tensor]:
+        r"""Run forward through the network and return a dict to be consumed
+        by the :class:`Executor`. This method will be called by
+        :class:``Executor` during training.
+
+        Args:
+            `batch`: tx.data.Batch
+                A batch of inputs to be passed through the network
+
+        Returns:
+            A dict with keys "loss" and "preds" containing the loss and
+            predictions on :attr:`batch` respectively.
+        """
         input_ids = batch["input_ids"]
         segment_ids = batch["segment_ids"]
         labels = batch["label_ids"]
@@ -101,6 +121,15 @@ def forward(self,  # type: ignore
         return {"loss": loss, "preds": preds}
 
     def predict(self, batch: tx.data.Batch) -> Dict[str, torch.Tensor]:
+        r"""Predict the labels for the :attr:`batch` of examples. This method
+        will be called instead of :meth:`forward` during validation or testing,
+        if :class:`Executor`'s :attr:`validate_mode` or :attr:`test_mode` is set
+        to ``"predict"`` instead of ``"eval"``.
+
+        Args:
+            `batch`: tx.data.Batch
+                A batch of inputs to run prediction on
+        """
         input_ids = batch["input_ids"]
         segment_ids = batch["segment_ids"]
 
@@ -112,11 +141,23 @@ def predict(self, batch: tx.data.Batch) -> Dict[str, torch.Tensor]:
 
 
 class TPE:
-    def __init__(self, model_config=None):
+    r""":class:`TPE` uses Tree-structured Parzen Estimator algorithm from
+    `hyperopt` for hyperparameter tuning.
+
+    Args:
+        model_config: Dict
+            A conf dict which is passed to BERT classifier
+        output_dir: str
+            A path to store the models
+
+    """
+    def __init__(self, model_config: Dict, output_dir: str = "output/"):
         tx.utils.maybe_create_dir(args.output_dir)
 
         self.model_config = model_config
 
+        self.output_dir = output_dir
+
         # create datasets
         self.train_dataset = tx.data.RecordData(
             hparams=config_data.train_hparam, device=device)
@@ -150,7 +191,31 @@ def __init__(self, model_config=None):
         self.optim = tx.core.BertAdam
 
     def objective_func(self, params: Dict):
-
+        r"""Compute a "loss" for a given hyperparameter values. This function is
+        passed to hyperopt's ``"fmin"`` (see the :meth:`run` method) function
+        and gets repeatedly called to find the best set of hyperparam values.
+        Below is an example of how to use this method
+
+        .. code-block:: python
+
+            import hyperopt as hpo
+
+            trials = hpo.Trials()
+            hpo.fmin(fn=self.objective_func,
+                     space=space,
+                     algo=hpo.tpe.suggest,
+                     max_evals=3,
+                     trials=trials)
+
+        Args:
+            params: Dict
+                A `(key, value)` dict representing the ``"value"`` to try for
+                the hyperparam ``"key"``
+
+        Returns:
+            A dict with keys "loss", "status" and "model" indicating the loss
+            for this trial, the status, and the path to the saved model.
+        """
         print(f"Using {params} for trial {self.exp_number}")
 
         # Loads data
@@ -188,7 +253,7 @@ def objective_func(self, params: Dict):
 
         valid_metric = metric.Accuracy(pred_name="preds",
                                        label_name="label_ids")
-        checkpoint_dir = f"./models/exp{self.exp_number}"
+        checkpoint_dir = f"./{self.output_dir}/exp{self.exp_number}"
 
         executor = Executor(
             # supply executor with the model
@@ -232,6 +297,13 @@ def objective_func(self, params: Dict):
         }
 
     def run(self, hyperparams: Dict):
+        r"""Run the TPE algorithm with hyperparameters  :attr:`hyperparams`
+
+        Args:
+            hyperparams: Dict
+                The `(key, value)` pairs of hyperparameters along their range of
+                values.
+        """
         space = {}
         for k, v in hyperparams.items():
             if isinstance(v, dict):
@@ -258,7 +330,7 @@ def run(self, hyperparams: Dict):
 def main():
     model_config = {k: v for k, v in config_downstream.items() if
                     k != "hyperparams"}
-    tpe = TPE(model_config=model_config)
+    tpe = TPE(model_config=model_config, output_dir=args.output_dir)
     hyperparams = config_downstream["hyperparams"]
     tpe.run(hyperparams)
 
diff --git a/examples/bert/config_data.py b/examples/bert/config_data.py
@@ -7,7 +7,7 @@
 max_batch_tokens = 128
 
 train_batch_size = 32
-max_train_epoch = 3
+max_train_epoch = 5
 display_steps = 50  # Print training loss every display_steps; -1 to disable
 
 # tbx config
diff --git a/examples/bert/requirements.txt b/examples/bert/requirements.txt
@@ -1,3 +1,3 @@
-tensorflow
-tensorboardX>=1.8
-hyperopt
+tensorflow # used for loading BERT official model checkpoint
+tensorboardX>=1.8 # used only in bert_classifier_using_executor_main.py
+hyperopt # used only in bert_with_hypertuning_main.py
diff --git a/texar/torch/run/executor.py b/texar/torch/run/executor.py
@@ -1348,11 +1348,11 @@ def _try_get_data_size(executor: 'Executor'):
         finally:
             self._train_tracker.stop()
 
+        self._fire_event(Event.Training, True)
+
         # close the log files
         self._close_files()
 
-        self._fire_event(Event.Training, True)
-
     def test(self, dataset: OptionalDict[DataBase] = None):
         r"""Start the test loop.
 
@@ -1414,11 +1414,11 @@ def test(self, dataset: OptionalDict[DataBase] = None):
 
             self._fire_event(Event.Testing, True)
 
+        self.model.train(model_mode)
+
         # close the log files
         self._close_files()
 
-        self.model.train(model_mode)
-
     def _register_logging_actions(self, show_live_progress: List[str]):
         # Register logging actions.
         Points = Sequence[Union[Condition, Event]]
@@ -1701,6 +1701,10 @@ def _register_hook(self, event_point: EventPoint, action: ActionFn,
                 f"Specified hook point {event_point} is invalid") from None
 
     def _open_files(self):
+        self._opened_files = []
+        self._log_destination = []
+        self._log_destination_is_tty = []
+
         for dest in utils.to_list(self.log_destination):
             if isinstance(dest, (str, Path)):
                 # Append to the logs to prevent accidentally overwriting