flairNLP · yosipk · Aug 14, 2019 · Aug 13, 2019 · Aug 13, 2019 · Aug 13, 2019
diff --git a/flair/embeddings.py b/flair/embeddings.py
@@ -1170,25 +1170,22 @@ def _get_transformer_sentence_embeddings(
 class TransformerXLEmbeddings(TokenEmbeddings):
     def __init__(
         self,
-        model: str = "transfo-xl-wt103",
+        pretrained_model_name_or_path: str = "transfo-xl-wt103",
         layers: str = "1,2,3",
         use_scalar_mix: bool = False,
     ):
         """Transformer-XL embeddings, as proposed in Dai et al., 2019.
-        :param model: name of Transformer-XL model
+        :param pretrained_model_name_or_path: name or path of Transformer-XL model
         :param layers: comma-separated list of layers
         :param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
         """
         super().__init__()
 
-        if model not in TRANSFORMER_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
-            raise ValueError("Provided Transformer-XL model is not available.")
-
-        self.tokenizer = TransfoXLTokenizer.from_pretrained(model)
+        self.tokenizer = TransfoXLTokenizer.from_pretrained(pretrained_model_name_or_path)
         self.model = TransfoXLModel.from_pretrained(
-            pretrained_model_name_or_path=model, output_hidden_states=True
+            pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
         )
-        self.name = model
+        self.name = pretrained_model_name_or_path
         self.layers: List[int] = [int(layer) for layer in layers.split(",")]
         self.use_scalar_mix = use_scalar_mix
         self.static_embeddings = True
@@ -1231,27 +1228,24 @@ def __str__(self):
 class XLNetEmbeddings(TokenEmbeddings):
     def __init__(
         self,
-        model: str = "xlnet-large-cased",
+        pretrained_model_name_or_path: str = "xlnet-large-cased",
         layers: str = "1",
         pooling_operation: str = "first_last",
         use_scalar_mix: bool = False,
     ):
         """XLNet embeddings, as proposed in Yang et al., 2019.
-        :param model: name of XLNet model
+        :param pretrained_model_name_or_path: name or path of XLNet model
         :param layers: comma-separated list of layers
         :param pooling_operation: defines pooling operation for subwords
         :param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
         """
         super().__init__()
 
-        if model not in XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
-            raise ValueError("Provided XLNet model is not available.")
-
-        self.tokenizer = XLNetTokenizer.from_pretrained(model)
+        self.tokenizer = XLNetTokenizer.from_pretrained(pretrained_model_name_or_path)
         self.model = XLNetModel.from_pretrained(
-            pretrained_model_name_or_path=model, output_hidden_states=True
+            pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
         )
-        self.name = model
+        self.name = pretrained_model_name_or_path
         self.layers: List[int] = [int(layer) for layer in layers.split(",")]
         self.pooling_operation = pooling_operation
         self.use_scalar_mix = use_scalar_mix
@@ -1296,28 +1290,25 @@ def __str__(self):
 class XLMEmbeddings(TokenEmbeddings):
     def __init__(
         self,
-        model: str = "xlm-mlm-en-2048",
+        pretrained_model_name_or_path: str = "xlm-mlm-en-2048",
         layers: str = "1",
         pooling_operation: str = "first_last",
         use_scalar_mix: bool = False,
     ):
         """
         XLM embeddings, as proposed in Guillaume et al., 2019.
-        :param model: name of XLM model
+        :param pretrained_model_name_or_path: name or path of XLM model
         :param layers: comma-separated list of layers
         :param pooling_operation: defines pooling operation for subwords
         :param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
         """
         super().__init__()
 
-        if model not in XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
-            raise ValueError("Provided XLM model is not available.")
-
-        self.tokenizer = XLMTokenizer.from_pretrained(model)
+        self.tokenizer = XLMTokenizer.from_pretrained(pretrained_model_name_or_path)
         self.model = XLMModel.from_pretrained(
-            pretrained_model_name_or_path=model, output_hidden_states=True
+            pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
         )
-        self.name = model
+        self.name = pretrained_model_name_or_path
         self.layers: List[int] = [int(layer) for layer in layers.split(",")]
         self.pooling_operation = pooling_operation
         self.use_scalar_mix = use_scalar_mix
@@ -1362,27 +1353,24 @@ def __str__(self):
 class OpenAIGPTEmbeddings(TokenEmbeddings):
     def __init__(
         self,
-        model: str = "openai-gpt",
+        pretrained_model_name_or_path: str = "openai-gpt",
         layers: str = "1",
         pooling_operation: str = "first_last",
         use_scalar_mix: bool = False,
     ):
         """OpenAI GPT embeddings, as proposed in Radford et al. 2018.
-        :param model: name of OpenAI GPT model
+        :param pretrained_model_name_or_path: name or path of OpenAI GPT model
         :param layers: comma-separated list of layers
         :param pooling_operation: defines pooling operation for subwords
         :param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
         """
         super().__init__()
 
-        if model not in OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
-            raise ValueError("Provided OpenAI GPT model is not available.")
-
-        self.tokenizer = OpenAIGPTTokenizer.from_pretrained(model)
+        self.tokenizer = OpenAIGPTTokenizer.from_pretrained(pretrained_model_name_or_path)
         self.model = OpenAIGPTModel.from_pretrained(
-            pretrained_model_name_or_path=model, output_hidden_states=True
+            pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
         )
-        self.name = model
+        self.name = pretrained_model_name_or_path
         self.layers: List[int] = [int(layer) for layer in layers.split(",")]
         self.pooling_operation = pooling_operation
         self.use_scalar_mix = use_scalar_mix
@@ -1425,27 +1413,24 @@ def __str__(self):
 class OpenAIGPT2Embeddings(TokenEmbeddings):
     def __init__(
         self,
-        model: str = "gpt2-medium",
+        pretrained_model_name_or_path: str = "gpt2-medium",
         layers: str = "1",
         pooling_operation: str = "first_last",
         use_scalar_mix: bool = False,
     ):
         """OpenAI GPT-2 embeddings, as proposed in Radford et al. 2019.
-        :param model: name of OpenAI GPT-2 model
+        :param pretrained_model_name_or_path: name or path of OpenAI GPT-2 model
         :param layers: comma-separated list of layers
         :param pooling_operation: defines pooling operation for subwords
         :param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
         """
         super().__init__()
 
-        if model not in OPENAI_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
-            raise ValueError("Provided OpenAI GPT-2 model is not available.")
-
-        self.tokenizer = GPT2Tokenizer.from_pretrained(model)
+        self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model_name_or_path)
         self.model = GPT2Model.from_pretrained(
-            pretrained_model_name_or_path=model, output_hidden_states=True
+            pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
         )
-        self.name = model
+        self.name = pretrained_model_name_or_path
         self.layers: List[int] = [int(layer) for layer in layers.split(",")]
         self.pooling_operation = pooling_operation
         self.use_scalar_mix = use_scalar_mix
@@ -2041,7 +2026,7 @@ def __init__(
         """
         Bidirectional transformer embeddings of words, as proposed in Devlin et al., 2018.
         :param bert_model_or_path: name of BERT model ('') or directory path containing custom model, configuration file
-        and vocab file (names of three files should be - bert_config.json, pytorch_model.bin/model.chkpt, vocab.txt)
+        and vocab file (names of three files should be - config.json, pytorch_model.bin/model.chkpt, vocab.txt)
         :param layers: string indicating which layers to take for embedding
         :param pooling_operation: how to get from token piece embeddings to token embedding. Either pool them and take
         the average ('mean') or use first word piece embedding as token embedding ('first)

diff --git a/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md b/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
@@ -233,12 +233,12 @@ embedding.embed(sentence)
 
 The `OpenAIGPTEmbeddings` class has several arguments:
 
-| Argument            | Default      | Description
-| ------------------- | ------------ | -------------------------------------------------
-| `model`             | `openai-gpt` | Defines GPT model
-| `layers`            | `1`          | Defines the to be used layers of the Transformer-based model
-| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
-| `use_scalar_mix`    | `False`      | See [Scalar mix section](#Scalar-mix)
+| Argument                        | Default      | Description
+| ------------------------------- | ------------ | -------------------------------------------------
+| `pretrained_model_name_or_path` | `openai-gpt` | Defines name or path of GPT model
+| `layers`                        | `1`          | Defines the to be used layers of the Transformer-based model
+| `pooling_operation`             | `first_last` | See [Pooling operation section](#Pooling-operation)
+| `use_scalar_mix`                | `False`      | See [Scalar mix section](#Scalar-mix)
 
 ## OpenAI GPT-2 Embeddings
 
@@ -262,12 +262,12 @@ embedding.embed(sentence)
 
 The `OpenAIGPT2Embeddings` class has several arguments:
 
-| Argument            | Default       | Description
-| ------------------- | ------------- | -------------------------------------------------
-| `model`             | `gpt2-medium` | Defines GPT-2 model
-| `layers`            | `1`           | Defines the to be used layers of the Transformer-based model
-| `pooling_operation` | `first_last`  | See [Pooling operation section](#Pooling-operation)
-| `use_scalar_mix`    | `False`       | See [Scalar mix section](#Scalar-mix)
+| Argument                        | Default       | Description
+| ------------------------------- | ------------- | -------------------------------------------------
+| `pretrained_model_name_or_path` | `gpt2-medium` | Defines name or path of GPT-2 model
+| `layers`                        | `1`           | Defines the to be used layers of the Transformer-based model
+| `pooling_operation`             | `first_last`  | See [Pooling operation section](#Pooling-operation)
+| `use_scalar_mix`                | `False`       | See [Scalar mix section](#Scalar-mix)
 
 Following GPT-2 models can be used:
 
@@ -300,11 +300,11 @@ embedding.embed(sentence)
 
 The following arguments can be passed to the `TransformerXLEmbeddings` class:
 
-| Argument            | Default            | Description
-| ------------------- | ------------------ | -------------------------------------------------
-| `model`             | `transfo-xl-wt103` | Defines Transformer-XL model
-| `layers`            | `1,2,3`            | Defines the to be used layers of the Transformer-based model
-| `use_scalar_mix`    | `False`            | See [Scalar mix section](#Scalar-mix)
+| Argument                        | Default            | Description
+| ------------------------------- | ------------------ | -------------------------------------------------
+| `pretrained_model_name_or_path` | `transfo-xl-wt103` | Defines name or path of Transformer-XL model
+| `layers`                        | `1,2,3`            | Defines the to be used layers of the Transformer-based model
+| `use_scalar_mix`                | `False`            | See [Scalar mix section](#Scalar-mix)
 
 Notice: The Transformer-XL model (trained on WikiText-103) is a word-based language model. Thus, no subword tokenization
 is necessary is needed (`pooling_operation` is not needed).
@@ -331,12 +331,12 @@ embedding.embed(sentence)
 
 The following arguments can be passed to the `XLNetEmbeddings` class:
 
-| Argument            | Default             | Description
-| ------------------- | ------------------- | -------------------------------------------------
-| `model`             | `xlnet-large-cased` | Defines XLNet model
-| `layers`            | `1`                 | Defines the to be used layers of the Transformer-based model
-| `pooling_operation` | `first_last`        | See [Pooling operation section](#Pooling-operation)
-| `use_scalar_mix`    | `False`             | See [Scalar mix section](#Scalar-mix)
+| Argument                        | Default             | Description
+| ------------------------------- | ------------------- | -------------------------------------------------
+| `pretrained_model_name_or_path` | `xlnet-large-cased` | Defines name or path of XLNet model
+| `layers`                        | `1`                 | Defines the to be used layers of the Transformer-based model
+| `pooling_operation`             | `first_last`        | See [Pooling operation section](#Pooling-operation)
+| `use_scalar_mix`                | `False`             | See [Scalar mix section](#Scalar-mix)
 
 Following XLNet models can be used:
 
@@ -370,12 +370,12 @@ embedding.embed(sentence)
 
 The following arguments can be passed to the `XLMEmbeddings` class:
 
-| Argument            | Default             | Description
-| ------------------- | ------------------- | -------------------------------------------------
-| `model`             | `xlm-mlm-en-2048`   | Defines XLM model
-| `layers`            | `1`                 | Defines the to be used layers of the Transformer-based model
-| `pooling_operation` | `first_last`        | See [Pooling operation section](#Pooling-operation)
-| `use_scalar_mix`    | `False`             | See [Scalar mix section](#Scalar-mix)
+| Argument                        | Default             | Description
+| ------------------------------- | ------------------- | -------------------------------------------------
+| `pretrained_model_name_or_path` | `xlm-mlm-en-2048`   | Defines name or path of XLM model
+| `layers`                        | `1`                 | Defines the to be used layers of the Transformer-based model
+| `pooling_operation`             | `first_last`        | See [Pooling operation section](#Pooling-operation)
+| `use_scalar_mix`                | `False`             | See [Scalar mix section](#Scalar-mix)
 
 Following XLM models can be used:
 

diff --git a/tests/test_transformer_embeddings.py b/tests/test_transformer_embeddings.py
@@ -222,7 +222,7 @@ def embed_sentence(
         use_scalar_mix: bool = False,
     ) -> Sentence:
         embeddings = OpenAIGPTEmbeddings(
-            model=gpt_model,
+            pretrained_model_name_or_path=gpt_model,
             layers=layers,
             pooling_operation=pooling_operation,
             use_scalar_mix=use_scalar_mix,
@@ -371,7 +371,7 @@ def embed_sentence(
         use_scalar_mix: bool = False,
     ) -> Sentence:
         embeddings = OpenAIGPT2Embeddings(
-            model=gpt_model,
+            pretrained_model_name_or_path=gpt_model,
             layers=layers,
             pooling_operation=pooling_operation,
             use_scalar_mix=use_scalar_mix,
@@ -526,7 +526,7 @@ def embed_sentence(
         use_scalar_mix: bool = False,
     ) -> Sentence:
         embeddings = XLNetEmbeddings(
-            model=xlnet_model,
+            pretrained_model_name_or_path=xlnet_model,
             layers=layers,
             pooling_operation=pooling_operation,
             use_scalar_mix=use_scalar_mix,
@@ -674,7 +674,9 @@ def embed_sentence(
         sentence: str, layers: str = "1", use_scalar_mix: bool = False
     ) -> Sentence:
         embeddings = TransformerXLEmbeddings(
-            model=transfo_model, layers=layers, use_scalar_mix=use_scalar_mix
+            pretrained_model_name_or_path=transfo_model,
+            layers=layers,
+            use_scalar_mix=use_scalar_mix,
         )
         flair_sentence = Sentence(sentence)
         embeddings.embed(flair_sentence)
@@ -752,7 +754,7 @@ def embed_sentence(
         use_scalar_mix: bool = False,
     ) -> Sentence:
         embeddings = XLMEmbeddings(
-            model=xlm_model,
+            pretrained_model_name_or_path=xlm_model,
             layers=layers,
             pooling_operation=pooling_operation,
             use_scalar_mix=use_scalar_mix,