Skip to content

Commit

Permalink
Merge pull request #4 from luozhouyang/dev
Browse files Browse the repository at this point in the history
Add albert adapter
  • Loading branch information
luozhouyang authored Jul 5, 2020
2 parents b519d62 + cb99ac2 commit fda24dd
Show file tree
Hide file tree
Showing 12 changed files with 433 additions and 74 deletions.
72 changes: 68 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@

Transformer-based models implemented in tensorflow 2.x(Keras).

## Contents

- [transformers-keras](#transformers-keras)
- [Contents](#contents)
- [Installation](#installation)
- [Models](#models)
- [Transformer](#transformer)
- [BERT](#bert)
- [Pretraining a new BERT model](#pretraining-a-new-bert-model)
- [Load a pretrained BERT model](#load-a-pretrained-bert-model)
- [ALBERT](#albert)
- [Pretraining a new ALBERT model](#pretraining-a-new-albert-model)
- [Load a pretrained ALBERT model](#load-a-pretrained-albert-model)


## Installation

```bash
Expand Down Expand Up @@ -53,6 +68,14 @@ runner.train(train_files, epochs=10, callbacks=None)

## BERT

You can use `BERT` models in two ways:

* [Pretraining a new BERT model](#pretraining-a-new-bert-model)
* [Load a pretrained model](#load-a-pretrained-bert-model)


### Pretraining a new BERT model

Use your own data to pretrain a BERT model.

```python
Expand Down Expand Up @@ -82,9 +105,9 @@ Tips:
> You can subclass `transformers_keras.tokenizers.BertTFRecordDatasetBuilder` to parse custom tfrecord examples as you need.

### Load the pretrained model
### Load a pretrained BERT model

You can use an `Adapter` to load pretrained models.
You can use an `BertAdapter` to load pretrained models.

Here is an example.

Expand All @@ -94,8 +117,8 @@ from transformers_keras.adapters import BertAdapter
# download the pretrained model and extract it to some path
PRETRAINED_BERT_MODEL = '/path/to/chinese_L-12_H-768_A-12'

adapter = BertAdapter()
model = adapter.adapte(PRETRAINED_BERT_MODEL)
adapter = BertAdapter(strategy='chinese-bert-base')
model, vocab_file = adapter.adapte(PRETRAINED_BERT_MODEL)

print('model inputs: {}'.format(model.inputs))
print('model outputs: {}'.format(model.outputs))
Expand All @@ -109,11 +132,20 @@ model inputs: [<tf.Tensor 'input_ids:0' shape=(None, 512) dtype=int32>, <tf.Tens
model outputs: [<tf.Tensor 'predictions/Identity:0' shape=(512, 21128) dtype=float32>, <tf.Tensor 'relations/Identity:0' shape=(2,) dtype=float32>]
```

You can implement a custom `Strategy` to load pretrained models from anywhere.
The `transformers_keras.adapters.bert_adapter.ChineseBertBaseStrategy` is an good example.

Then, you can use this model to do anything you want!


## ALBERT

You can use `ALBERT` model in two ways:
* [Pretraining a new ALBERT model](#pretraining-a-new-albert-model)
* [Load a pretrained ALBERT model](#load-a-pretrained-albert-model)


### Pretraining a new ALBERT model
You should process your data to tfrecord format. Modify this script `transformers_keras/utils/bert_tfrecord_custom_generator.py` as you need.


Expand All @@ -139,3 +171,35 @@ train_files = ['testdata/bert_custom_pretrain.tfrecord']
runner.train(train_files, epochs=10, callbacks=None)

```

### Load a pretrained ALBERT model

You can use an `AlbertAdapter` to load pretrained models.

Here is an example.

```python
from transformers_keras.adapters import AlbertAdapter

# download the pretrained model and extract it to some path
PRETRAINED_BERT_MODEL = '/path/to/zh_albert_large'

adapter = AlbertAdapter(strategy='zh-albert-large')
model, vocab_file = adapter.adapte(PRETRAINED_BERT_MODEL)

print('model inputs: {}'.format(model.inputs))
print('model outputs: {}'.format(model.outputs))

```

will print:

```bash
model inputs: [<tf.Tensor 'input_ids:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'segment_ids:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'input_mask:0' shape=(None, 512) dtype=int32>]
model outputs: [<tf.Tensor 'predictions/Identity:0' shape=(None, 512, 21128) dtype=float32>, <tf.Tensor 'relations/Identity:0' shape=(None, 2) dtype=float32>]
```

You can implement a custom `Strategy` to load pretrained models from anywhere.
The `transformers_keras.adapters.albert_adapter.ChineseAlbertLargeStrategy` is an good example.

Then, you can use this model to do anything you want!
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="transformers_keras",
version="0.1.2",
version="0.1.3",
description="Transformer-based models implemented in tensorflow 2.x(Keras)",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
50 changes: 47 additions & 3 deletions transformers_keras/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from .runners import AlbertRunner, BertRunner, TransformerRunner

__name__ = 'transformers_keras'
r_version__ = '0.1.2'
r_version__ = '0.1.3'

logging.basicConfig(format="%(asctime)s %(levelname)s %(filename)15s %(lineno)4d] %(message)s", level=logging.INFO)

Expand All @@ -55,8 +55,8 @@ def build_pretraining_bert_model(model_config):
bert = Bert4PreTraining(**model_config)
outputs = bert(inputs)

predictions = tf.keras.layers.Lambda(lambda x: x[0], name='predictions')(outputs[0])
relations = tf.keras.layers.Lambda(lambda x: x[1], name='relations')(outputs[1])
predictions = tf.keras.layers.Lambda(lambda x: x, name='predictions')(outputs[0])
relations = tf.keras.layers.Lambda(lambda x: x, name='relations')(outputs[1])

model = tf.keras.Model(inputs=inputs, outputs=[predictions, relations])
lr = model_config.get('learning_rate', 3e-5)
Expand All @@ -78,3 +78,47 @@ def build_pretraining_bert_model(model_config):
})
model.summary()
return model


def build_pretraining_albert_model(model_config):
max_sequence_length = model_config.get('max_positions', 512)
input_ids = tf.keras.layers.Input(
shape=(max_sequence_length,), dtype=tf.int32, name='input_ids')
input_mask = tf.keras.layers.Input(
shape=(max_sequence_length,), dtype=tf.int32, name='input_mask')
segment_ids = tf.keras.layers.Input(
shape=(max_sequence_length,), dtype=tf.int32, name='segment_ids')

inputs = (input_ids, segment_ids, input_mask)
albert = Albert4PreTraining(**model_config)
predictions, relations, all_states, all_attn_weights = albert(inputs=inputs)

predictions = tf.keras.layers.Lambda(lambda x: x, name='predictions')(predictions)
relations = tf.keras.layers.Lambda(lambda x: x, name='relations')(relations)

model = tf.keras.Model(
inputs=[input_ids, segment_ids, input_mask], outputs=[predictions, relations])

lr = model_config.get('learning_rate', 3e-5)
epsilon = model_config.get('epsilon', 1e-12)
clipnorm = model_config.get('clipnorm', 1.0)

model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=lr, epsilon=epsilon, clipnorm=clipnorm),
loss={
'predictions': MaskedSparseCategoricalCrossentropy(
mask_id=0, from_logits=True, name='pred_loss'),
'relations': tf.keras.losses.CategoricalCrossentropy(
from_logits=True, name='rel_loss'),
},
metrics={
'predictions': [
MaskedSparseCategoricalAccuracy(
mask_id=0, from_logits=False, name='pred_acc'),
],
'relations': [
tf.keras.metrics.CategoricalAccuracy(name='rel_acc'),
]
})
model.summary()
return model
5 changes: 3 additions & 2 deletions transformers_keras/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .abstract_adapter import AbstractAdapter
from .bert_adapter import BertAdapter
from .abstract_adapter import AbstractAdapter, AbstractStrategy, PretrainedModelAdapter
from .albert_adapter import AlbertAdapter, ChineseAlbertLargeStrategy
from .bert_adapter import BertAdapter, ChineseBertBaseStrategy
94 changes: 93 additions & 1 deletion transformers_keras/adapters/abstract_adapter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,101 @@
import abc
import logging
import os

import tensorflow as tf


class AbstractStrategy(abc.ABC):
"""Pretrained model load strategy."""

def mapping_config(self, pretrained_config_file):
"""Convert pretrained configs to model configs.
Args:
pretrained_config_file: File path of pretrained model's config
Returns:
A python dict, model config
"""
raise NotImplementedError()

def build_model(self, model_config):
"""Build and compile model accroding to model config.
Args:
model_config: A python dict, model's config
Returns:
A keras model, instance of `tf.keras.Model`, compiled.
"""
raise NotImplementedError()

def mapping_variables(self, model_config, model, ckpt):
"""Mapping pretrained variables to model's variables.
Args:
model_config: A python dict, model's config
model: A keras model, compiled
ckpt: Python str, pretrained checkpoint model.
Returns:
Python dict, variables' name mapping
"""
raise NotImplementedError()

def zip_weights(self, model, ckpt, variables_mapping):
"""Zip weights and values.
Args:
model: A keras model, compiled
ckpt: Python str, pretrained checkpoint model
variables_mapping: Python dict, variables' name mapping
Returns:
A List of tuple (model_weight, pretrained_weight)
"""
raise NotImplementedError()


class AbstractAdapter(abc.ABC):

@abc.abstractmethod
def adapte(self, pretrain_model_dir, checkpoint, **kwargs):
def adapte(self, pretrain_model_dir, **kwargs):
raise NotImplementedError()

def _parse_files(self, pretrain_model_dir):
config_file, ckpt, vocab = None, None, None
if not os.path.exists(pretrain_model_dir):
logging.info('pretrain model dir: {} is not exists.'.format(pretrain_model_dir))
return
for f in os.listdir(pretrain_model_dir):
if str(f).endswith('config.json'):
config_file = os.path.join(pretrain_model_dir, f)
if 'vocab' in str(f):
vocab = os.path.join(pretrain_model_dir, f)
if 'ckpt' in str(f):
n = '.'.join(str(f).split('.')[:-1])
ckpt = os.path.join(pretrain_model_dir, n)
return config_file, ckpt, vocab


class PretrainedModelAdapter(AbstractAdapter):
"""Base class of pretrain models' adapter."""

def __init__(self, strategy: AbstractStrategy):
"""Init.
Args:
strategy: An instance of `AbstractStrategy`
"""
super().__init__()
self.strategy = strategy

def adapte(self, pretrain_model_dir, **kwargs):
config_file, ckpt, vocab_file = self._parse_files(pretrain_model_dir)
model_config = self.strategy.mapping_config(config_file)
model = self.strategy.build_model(model_config)
names_mapping = self.strategy.mapping_variables(model_config, model, ckpt)
weights_values = self.strategy.zip_weights(model, ckpt, names_mapping)
tf.keras.backend.batch_set_value(weights_values)
return model, vocab_file
Loading

0 comments on commit fda24dd

Please sign in to comment.