Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PyTorch-Transformers: add possibility to pass model path to embeddings #993

Merged
merged 3 commits into from
Aug 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 26 additions & 41 deletions flair/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,25 +1170,22 @@ def _get_transformer_sentence_embeddings(
class TransformerXLEmbeddings(TokenEmbeddings):
def __init__(
self,
model: str = "transfo-xl-wt103",
pretrained_model_name_or_path: str = "transfo-xl-wt103",
layers: str = "1,2,3",
use_scalar_mix: bool = False,
):
"""Transformer-XL embeddings, as proposed in Dai et al., 2019.
:param model: name of Transformer-XL model
:param pretrained_model_name_or_path: name or path of Transformer-XL model
:param layers: comma-separated list of layers
:param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
"""
super().__init__()

if model not in TRANSFORMER_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
raise ValueError("Provided Transformer-XL model is not available.")

self.tokenizer = TransfoXLTokenizer.from_pretrained(model)
self.tokenizer = TransfoXLTokenizer.from_pretrained(pretrained_model_name_or_path)
self.model = TransfoXLModel.from_pretrained(
pretrained_model_name_or_path=model, output_hidden_states=True
pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
)
self.name = model
self.name = pretrained_model_name_or_path
self.layers: List[int] = [int(layer) for layer in layers.split(",")]
self.use_scalar_mix = use_scalar_mix
self.static_embeddings = True
Expand Down Expand Up @@ -1231,27 +1228,24 @@ def __str__(self):
class XLNetEmbeddings(TokenEmbeddings):
def __init__(
self,
model: str = "xlnet-large-cased",
pretrained_model_name_or_path: str = "xlnet-large-cased",
layers: str = "1",
pooling_operation: str = "first_last",
use_scalar_mix: bool = False,
):
"""XLNet embeddings, as proposed in Yang et al., 2019.
:param model: name of XLNet model
:param pretrained_model_name_or_path: name or path of XLNet model
:param layers: comma-separated list of layers
:param pooling_operation: defines pooling operation for subwords
:param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
"""
super().__init__()

if model not in XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
raise ValueError("Provided XLNet model is not available.")

self.tokenizer = XLNetTokenizer.from_pretrained(model)
self.tokenizer = XLNetTokenizer.from_pretrained(pretrained_model_name_or_path)
self.model = XLNetModel.from_pretrained(
pretrained_model_name_or_path=model, output_hidden_states=True
pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
)
self.name = model
self.name = pretrained_model_name_or_path
self.layers: List[int] = [int(layer) for layer in layers.split(",")]
self.pooling_operation = pooling_operation
self.use_scalar_mix = use_scalar_mix
Expand Down Expand Up @@ -1296,28 +1290,25 @@ def __str__(self):
class XLMEmbeddings(TokenEmbeddings):
def __init__(
self,
model: str = "xlm-mlm-en-2048",
pretrained_model_name_or_path: str = "xlm-mlm-en-2048",
layers: str = "1",
pooling_operation: str = "first_last",
use_scalar_mix: bool = False,
):
"""
XLM embeddings, as proposed in Guillaume et al., 2019.
:param model: name of XLM model
:param pretrained_model_name_or_path: name or path of XLM model
:param layers: comma-separated list of layers
:param pooling_operation: defines pooling operation for subwords
:param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
"""
super().__init__()

if model not in XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
raise ValueError("Provided XLM model is not available.")

self.tokenizer = XLMTokenizer.from_pretrained(model)
self.tokenizer = XLMTokenizer.from_pretrained(pretrained_model_name_or_path)
self.model = XLMModel.from_pretrained(
pretrained_model_name_or_path=model, output_hidden_states=True
pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
)
self.name = model
self.name = pretrained_model_name_or_path
self.layers: List[int] = [int(layer) for layer in layers.split(",")]
self.pooling_operation = pooling_operation
self.use_scalar_mix = use_scalar_mix
Expand Down Expand Up @@ -1362,27 +1353,24 @@ def __str__(self):
class OpenAIGPTEmbeddings(TokenEmbeddings):
def __init__(
self,
model: str = "openai-gpt",
pretrained_model_name_or_path: str = "openai-gpt",
layers: str = "1",
pooling_operation: str = "first_last",
use_scalar_mix: bool = False,
):
"""OpenAI GPT embeddings, as proposed in Radford et al. 2018.
:param model: name of OpenAI GPT model
:param pretrained_model_name_or_path: name or path of OpenAI GPT model
:param layers: comma-separated list of layers
:param pooling_operation: defines pooling operation for subwords
:param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
"""
super().__init__()

if model not in OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
raise ValueError("Provided OpenAI GPT model is not available.")

self.tokenizer = OpenAIGPTTokenizer.from_pretrained(model)
self.tokenizer = OpenAIGPTTokenizer.from_pretrained(pretrained_model_name_or_path)
self.model = OpenAIGPTModel.from_pretrained(
pretrained_model_name_or_path=model, output_hidden_states=True
pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
)
self.name = model
self.name = pretrained_model_name_or_path
self.layers: List[int] = [int(layer) for layer in layers.split(",")]
self.pooling_operation = pooling_operation
self.use_scalar_mix = use_scalar_mix
Expand Down Expand Up @@ -1425,27 +1413,24 @@ def __str__(self):
class OpenAIGPT2Embeddings(TokenEmbeddings):
def __init__(
self,
model: str = "gpt2-medium",
pretrained_model_name_or_path: str = "gpt2-medium",
layers: str = "1",
pooling_operation: str = "first_last",
use_scalar_mix: bool = False,
):
"""OpenAI GPT-2 embeddings, as proposed in Radford et al. 2019.
:param model: name of OpenAI GPT-2 model
:param pretrained_model_name_or_path: name or path of OpenAI GPT-2 model
:param layers: comma-separated list of layers
:param pooling_operation: defines pooling operation for subwords
:param use_scalar_mix: defines the usage of scalar mix for specified layer(s)
"""
super().__init__()

if model not in OPENAI_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys():
raise ValueError("Provided OpenAI GPT-2 model is not available.")

self.tokenizer = GPT2Tokenizer.from_pretrained(model)
self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model_name_or_path)
self.model = GPT2Model.from_pretrained(
pretrained_model_name_or_path=model, output_hidden_states=True
pretrained_model_name_or_path=pretrained_model_name_or_path, output_hidden_states=True
)
self.name = model
self.name = pretrained_model_name_or_path
self.layers: List[int] = [int(layer) for layer in layers.split(",")]
self.pooling_operation = pooling_operation
self.use_scalar_mix = use_scalar_mix
Expand Down Expand Up @@ -2041,7 +2026,7 @@ def __init__(
"""
Bidirectional transformer embeddings of words, as proposed in Devlin et al., 2018.
:param bert_model_or_path: name of BERT model ('') or directory path containing custom model, configuration file
and vocab file (names of three files should be - bert_config.json, pytorch_model.bin/model.chkpt, vocab.txt)
and vocab file (names of three files should be - config.json, pytorch_model.bin/model.chkpt, vocab.txt)
:param layers: string indicating which layers to take for embedding
:param pooling_operation: how to get from token piece embeddings to token embedding. Either pool them and take
the average ('mean') or use first word piece embedding as token embedding ('first)
Expand Down
58 changes: 29 additions & 29 deletions resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,12 @@ embedding.embed(sentence)

The `OpenAIGPTEmbeddings` class has several arguments:

| Argument | Default | Description
| ------------------- | ------------ | -------------------------------------------------
| `model` | `openai-gpt` | Defines GPT model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)
| Argument | Default | Description
| ------------------------------- | ------------ | -------------------------------------------------
| `pretrained_model_name_or_path` | `openai-gpt` | Defines name or path of GPT model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)

## OpenAI GPT-2 Embeddings

Expand All @@ -262,12 +262,12 @@ embedding.embed(sentence)

The `OpenAIGPT2Embeddings` class has several arguments:

| Argument | Default | Description
| ------------------- | ------------- | -------------------------------------------------
| `model` | `gpt2-medium` | Defines GPT-2 model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)
| Argument | Default | Description
| ------------------------------- | ------------- | -------------------------------------------------
| `pretrained_model_name_or_path` | `gpt2-medium` | Defines name or path of GPT-2 model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)

Following GPT-2 models can be used:

Expand Down Expand Up @@ -300,11 +300,11 @@ embedding.embed(sentence)

The following arguments can be passed to the `TransformerXLEmbeddings` class:

| Argument | Default | Description
| ------------------- | ------------------ | -------------------------------------------------
| `model` | `transfo-xl-wt103` | Defines Transformer-XL model
| `layers` | `1,2,3` | Defines the to be used layers of the Transformer-based model
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)
| Argument | Default | Description
| ------------------------------- | ------------------ | -------------------------------------------------
| `pretrained_model_name_or_path` | `transfo-xl-wt103` | Defines name or path of Transformer-XL model
| `layers` | `1,2,3` | Defines the to be used layers of the Transformer-based model
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)

Notice: The Transformer-XL model (trained on WikiText-103) is a word-based language model. Thus, no subword tokenization
is necessary is needed (`pooling_operation` is not needed).
Expand All @@ -331,12 +331,12 @@ embedding.embed(sentence)

The following arguments can be passed to the `XLNetEmbeddings` class:

| Argument | Default | Description
| ------------------- | ------------------- | -------------------------------------------------
| `model` | `xlnet-large-cased` | Defines XLNet model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)
| Argument | Default | Description
| ------------------------------- | ------------------- | -------------------------------------------------
| `pretrained_model_name_or_path` | `xlnet-large-cased` | Defines name or path of XLNet model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)

Following XLNet models can be used:

Expand Down Expand Up @@ -370,12 +370,12 @@ embedding.embed(sentence)

The following arguments can be passed to the `XLMEmbeddings` class:

| Argument | Default | Description
| ------------------- | ------------------- | -------------------------------------------------
| `model` | `xlm-mlm-en-2048` | Defines XLM model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)
| Argument | Default | Description
| ------------------------------- | ------------------- | -------------------------------------------------
| `pretrained_model_name_or_path` | `xlm-mlm-en-2048` | Defines name or path of XLM model
| `layers` | `1` | Defines the to be used layers of the Transformer-based model
| `pooling_operation` | `first_last` | See [Pooling operation section](#Pooling-operation)
| `use_scalar_mix` | `False` | See [Scalar mix section](#Scalar-mix)

Following XLM models can be used:

Expand Down
12 changes: 7 additions & 5 deletions tests/test_transformer_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def embed_sentence(
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = OpenAIGPTEmbeddings(
model=gpt_model,
pretrained_model_name_or_path=gpt_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
Expand Down Expand Up @@ -371,7 +371,7 @@ def embed_sentence(
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = OpenAIGPT2Embeddings(
model=gpt_model,
pretrained_model_name_or_path=gpt_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
Expand Down Expand Up @@ -526,7 +526,7 @@ def embed_sentence(
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = XLNetEmbeddings(
model=xlnet_model,
pretrained_model_name_or_path=xlnet_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
Expand Down Expand Up @@ -674,7 +674,9 @@ def embed_sentence(
sentence: str, layers: str = "1", use_scalar_mix: bool = False
) -> Sentence:
embeddings = TransformerXLEmbeddings(
model=transfo_model, layers=layers, use_scalar_mix=use_scalar_mix
pretrained_model_name_or_path=transfo_model,
layers=layers,
use_scalar_mix=use_scalar_mix,
)
flair_sentence = Sentence(sentence)
embeddings.embed(flair_sentence)
Expand Down Expand Up @@ -752,7 +754,7 @@ def embed_sentence(
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = XLMEmbeddings(
model=xlm_model,
pretrained_model_name_or_path=xlm_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
Expand Down