diff --git a/.dev/clean_models.py b/.dev/clean_models.py
new file mode 100644
index 0000000000..c9ac2acbc0
--- /dev/null
+++ b/.dev/clean_models.py
@@ -0,0 +1,125 @@
+import argparse
+import glob
+import json
+import os
+import os.path as osp
+
+import mmcv
+
+# build schedule look-up table to automatically find the final model
+SCHEDULES_LUT = {
+ '20ki': 20000,
+ '40ki': 40000,
+ '60ki': 60000,
+ '80ki': 80000,
+ '160ki': 160000
+}
+RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']
+
+
+def get_final_iter(config):
+ iter_num = SCHEDULES_LUT[config.split('_')[-2]]
+ return iter_num
+
+
+def get_final_results(log_json_path, iter_num):
+ result_dict = dict()
+ with open(log_json_path, 'r') as f:
+ for line in f.readlines():
+ log_line = json.loads(line)
+ if 'mode' not in log_line.keys():
+ continue
+
+ if log_line['mode'] == 'train' and log_line['iter'] == iter_num:
+ result_dict['memory'] = log_line['memory']
+
+ if log_line['iter'] == iter_num:
+ result_dict.update({
+ key: log_line[key]
+ for key in RESULTS_LUT if key in log_line
+ })
+ return result_dict
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
+ parser.add_argument(
+ 'root',
+ type=str,
+ help='root path of benchmarked models to be gathered')
+ parser.add_argument(
+ 'config',
+ type=str,
+ help='root path of benchmarked configs to be gathered')
+
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ models_root = args.root
+ config_name = args.config
+
+ # find all models in the root directory to be gathered
+ raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))
+
+ # filter configs that is not trained in the experiments dir
+ used_configs = []
+ for raw_config in raw_configs:
+ work_dir = osp.splitext(osp.basename(raw_config))[0]
+ if osp.exists(osp.join(models_root, work_dir)):
+ used_configs.append(work_dir)
+ print(f'Find {len(used_configs)} models to be gathered')
+
+ # find final_ckpt and log file for trained each config
+ # and parse the best performance
+ model_infos = []
+ for used_config in used_configs:
+ exp_dir = osp.join(models_root, used_config)
+ # check whether the exps is finished
+ final_iter = get_final_iter(used_config)
+ final_model = 'iter_{}.pth'.format(final_iter)
+ model_path = osp.join(exp_dir, final_model)
+
+ # skip if the model is still training
+ if not osp.exists(model_path):
+ print(f'{used_config} not finished yet')
+ continue
+
+ # get logs
+ log_json_path = glob.glob(osp.join(exp_dir, '*.log.json'))[0]
+ log_txt_path = glob.glob(osp.join(exp_dir, '*.log'))[0]
+ model_performance = get_final_results(log_json_path, final_iter)
+
+ if model_performance is None:
+ print(f'{used_config} does not have performance')
+ continue
+
+ model_time = osp.split(log_txt_path)[-1].split('.')[0]
+ model_infos.append(
+ dict(
+ config=used_config,
+ results=model_performance,
+ iters=final_iter,
+ model_time=model_time,
+ log_json_path=osp.split(log_json_path)[-1]))
+
+ # publish model for each checkpoint
+ for model in model_infos:
+
+ model_name = osp.split(model['config'])[-1].split('.')[0]
+
+ model_name += '_' + model['model_time']
+ for checkpoints in mmcv.scandir(
+ osp.join(models_root, model['config']), suffix='.pth'):
+ if checkpoints.endswith(f"iter_{model['iters']}.pth"
+ ) or checkpoints.endswith('latest.pth'):
+ continue
+ print('removing {}'.format(
+ osp.join(models_root, model['config'], checkpoints)))
+ os.remove(osp.join(models_root, model['config'], checkpoints))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.dev/gather_models.py b/.dev/gather_models.py
new file mode 100644
index 0000000000..1899195d7d
--- /dev/null
+++ b/.dev/gather_models.py
@@ -0,0 +1,197 @@
+import argparse
+import glob
+import json
+import os
+import os.path as osp
+import shutil
+import subprocess
+
+import mmcv
+import torch
+
+# build schedule look-up table to automatically find the final model
+RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']
+
+
+def process_checkpoint(in_file, out_file):
+ checkpoint = torch.load(in_file, map_location='cpu')
+ # remove optimizer for smaller file size
+ if 'optimizer' in checkpoint:
+ del checkpoint['optimizer']
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
+ # add the code here.
+ torch.save(checkpoint, out_file)
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
+ final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+ subprocess.Popen(['mv', out_file, final_file])
+ return final_file
+
+
+def get_final_iter(config):
+ iter_num = config.split('_')[-2]
+ assert iter_num.endswith('k')
+ return int(iter_num[:-1]) * 1000
+
+
+def get_final_results(log_json_path, iter_num):
+ result_dict = dict()
+ with open(log_json_path, 'r') as f:
+ for line in f.readlines():
+ log_line = json.loads(line)
+ if 'mode' not in log_line.keys():
+ continue
+
+ if log_line['mode'] == 'train' and log_line['iter'] == iter_num:
+ result_dict['memory'] = log_line['memory']
+
+ if log_line['iter'] == iter_num:
+ result_dict.update({
+ key: log_line[key]
+ for key in RESULTS_LUT if key in log_line
+ })
+ return result_dict
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
+ parser.add_argument(
+ 'root',
+ type=str,
+ help='root path of benchmarked models to be gathered')
+ parser.add_argument(
+ 'config',
+ type=str,
+ help='root path of benchmarked configs to be gathered')
+ parser.add_argument(
+ 'out_dir',
+ type=str,
+ help='output path of gathered models to be stored')
+ parser.add_argument('out_file', type=str, help='the output json file name')
+ parser.add_argument(
+ '--filter', type=str, nargs='+', default=[], help='config filter')
+ parser.add_argument(
+ '--all', action='store_true', help='whether include .py and .log')
+
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ models_root = args.root
+ models_out = args.out_dir
+ config_name = args.config
+ mmcv.mkdir_or_exist(models_out)
+
+ # find all models in the root directory to be gathered
+ raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))
+
+ # filter configs that is not trained in the experiments dir
+ used_configs = []
+ for raw_config in raw_configs:
+ work_dir = osp.splitext(osp.basename(raw_config))[0]
+ if osp.exists(osp.join(models_root, work_dir)):
+ used_configs.append((work_dir, raw_config))
+ print(f'Find {len(used_configs)} models to be gathered')
+
+ # find final_ckpt and log file for trained each config
+ # and parse the best performance
+ model_infos = []
+ for used_config, raw_config in used_configs:
+ bypass = True
+ for p in args.filter:
+ if p in used_config:
+ bypass = False
+ break
+ if bypass:
+ continue
+ exp_dir = osp.join(models_root, used_config)
+ # check whether the exps is finished
+ final_iter = get_final_iter(used_config)
+ final_model = 'iter_{}.pth'.format(final_iter)
+ model_path = osp.join(exp_dir, final_model)
+
+ # skip if the model is still training
+ if not osp.exists(model_path):
+ print(f'{used_config} train not finished yet')
+ continue
+
+ # get logs
+ log_json_paths = glob.glob(osp.join(exp_dir, '*.log.json'))
+ log_json_path = log_json_paths[0]
+ model_performance = None
+ for idx, _log_json_path in enumerate(log_json_paths):
+ model_performance = get_final_results(_log_json_path, final_iter)
+ if model_performance is not None:
+ log_json_path = _log_json_path
+ break
+
+ if model_performance is None:
+ print(f'{used_config} model_performance is None')
+ continue
+
+ model_time = osp.split(log_json_path)[-1].split('.')[0]
+ model_infos.append(
+ dict(
+ config=used_config,
+ raw_config=raw_config,
+ results=model_performance,
+ iters=final_iter,
+ model_time=model_time,
+ log_json_path=osp.split(log_json_path)[-1]))
+
+ # publish model for each checkpoint
+ publish_model_infos = []
+ for model in model_infos:
+ model_publish_dir = osp.join(models_out,
+ model['raw_config'].rstrip('.py'))
+ model_name = osp.split(model['config'])[-1].split('.')[0]
+
+ publish_model_path = osp.join(model_publish_dir,
+ model_name + '_' + model['model_time'])
+ trained_model_path = osp.join(models_root, model['config'],
+ 'iter_{}.pth'.format(model['iters']))
+ if osp.exists(model_publish_dir):
+ for file in os.listdir(model_publish_dir):
+ if file.endswith('.pth'):
+ print(f'model {file} found')
+ model['model_path'] = osp.abspath(
+ osp.join(model_publish_dir, file))
+ break
+ if 'model_path' not in model:
+ print(f'dir {model_publish_dir} exists, no model found')
+
+ else:
+ mmcv.mkdir_or_exist(model_publish_dir)
+
+ # convert model
+ final_model_path = process_checkpoint(trained_model_path,
+ publish_model_path)
+ model['model_path'] = final_model_path
+
+ new_json_path = f'{model_name}-{model["log_json_path"]}'
+ # copy log
+ shutil.copy(
+ osp.join(models_root, model['config'], model['log_json_path']),
+ osp.join(model_publish_dir, new_json_path))
+ if args.all:
+ new_txt_path = new_json_path.rstrip('.json')
+ shutil.copy(
+ osp.join(models_root, model['config'],
+ model['log_json_path'].rstrip('.json')),
+ osp.join(model_publish_dir, new_txt_path))
+
+ if args.all:
+ # copy config to guarantee reproducibility
+ raw_config = osp.join(config_name, model['raw_config'])
+ mmcv.Config.fromfile(raw_config).dump(
+ osp.join(model_publish_dir, osp.basename(raw_config)))
+
+ publish_model_infos.append(model)
+
+ models = dict(models=publish_model_infos)
+ mmcv.dump(models, osp.join(models_out, args.out_file))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.dev/generate_table.py b/.dev/generate_table.py
new file mode 100644
index 0000000000..25142caee0
--- /dev/null
+++ b/.dev/generate_table.py
@@ -0,0 +1,152 @@
+import argparse
+import csv
+import glob
+import json
+import os.path as osp
+from collections import OrderedDict
+
+import mmcv
+
+# build schedule look-up table to automatically find the final model
+RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']
+
+
+def get_final_iter(config):
+ iter_num = config.split('_')[-2]
+ assert iter_num.endswith('ki')
+ return int(iter_num[:-2]) * 1000
+
+
+def get_final_results(log_json_path, iter_num):
+ result_dict = dict()
+ with open(log_json_path, 'r') as f:
+ for line in f.readlines():
+ log_line = json.loads(line)
+ if 'mode' not in log_line.keys():
+ continue
+
+ if log_line['mode'] == 'train' and log_line[
+ 'iter'] == iter_num - 50:
+ result_dict['memory'] = log_line['memory']
+
+ if log_line['iter'] == iter_num:
+ result_dict.update({
+ key: log_line[key] * 100
+ for key in RESULTS_LUT if key in log_line
+ })
+ return result_dict
+
+
+def get_total_time(log_json_path, iter_num):
+
+ def convert(seconds):
+ hour = seconds // 3600
+ seconds %= 3600
+ minutes = seconds // 60
+ seconds %= 60
+
+ return f'{hour:d}:{minutes:2d}:{seconds:2d}'
+
+ time_dict = dict()
+ with open(log_json_path, 'r') as f:
+ last_iter = 0
+ total_sec = 0
+ for line in f.readlines():
+ log_line = json.loads(line)
+ if 'mode' not in log_line.keys():
+ continue
+
+ if log_line['mode'] == 'train':
+ cur_iter = log_line['iter']
+ total_sec += (cur_iter - last_iter) * log_line['time']
+ last_iter = cur_iter
+ time_dict['time'] = convert(int(total_sec))
+
+ return time_dict
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
+ parser.add_argument(
+ 'root',
+ type=str,
+ help='root path of benchmarked models to be gathered')
+ parser.add_argument(
+ 'config',
+ type=str,
+ help='root path of benchmarked configs to be gathered')
+ parser.add_argument(
+ 'out', type=str, help='output path of gathered models to be stored')
+
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ models_root = args.root
+ models_out = args.out
+ config_name = args.config
+ mmcv.mkdir_or_exist(models_out)
+
+ # find all models in the root directory to be gathered
+ raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))
+
+ # filter configs that is not trained in the experiments dir
+ exp_dirs = []
+ for raw_config in raw_configs:
+ work_dir = osp.splitext(osp.basename(raw_config))[0]
+ if osp.exists(osp.join(models_root, work_dir)):
+ exp_dirs.append(work_dir)
+ print(f'Find {len(exp_dirs)} models to be gathered')
+
+ # find final_ckpt and log file for trained each config
+ # and parse the best performance
+ model_infos = []
+ for work_dir in exp_dirs:
+ exp_dir = osp.join(models_root, work_dir)
+ # check whether the exps is finished
+ final_iter = get_final_iter(work_dir)
+ final_model = 'iter_{}.pth'.format(final_iter)
+ model_path = osp.join(exp_dir, final_model)
+
+ # skip if the model is still training
+ if not osp.exists(model_path):
+ print(f'{model_path} not finished yet')
+ continue
+
+ # get logs
+ log_json_path = glob.glob(osp.join(exp_dir, '*.log.json'))[0]
+ model_performance = get_final_results(log_json_path, final_iter)
+
+ if model_performance is None:
+ continue
+
+ head = work_dir.split('_')[0]
+ backbone = work_dir.split('_')[1]
+ crop_size = work_dir.split('_')[-3]
+ dataset = work_dir.split('_')[-1]
+ model_info = OrderedDict(
+ head=head,
+ backbone=backbone,
+ crop_size=crop_size,
+ dataset=dataset,
+ iters=f'{final_iter//1000}ki')
+ model_info.update(model_performance)
+ model_time = get_total_time(log_json_path, final_iter)
+ model_info.update(model_time)
+ model_info['config'] = work_dir
+ model_infos.append(model_info)
+
+ with open(
+ osp.join(models_out, 'models_table.csv'), 'w',
+ newline='') as csvfile:
+ writer = csv.writer(
+ csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+ writer.writerow(model_infos[0].keys())
+ for model_info in model_infos:
+ writer.writerow(model_info.values())
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.dev/modelzoo_json2md.py b/.dev/modelzoo_json2md.py
new file mode 100644
index 0000000000..7cb44bffa2
--- /dev/null
+++ b/.dev/modelzoo_json2md.py
@@ -0,0 +1,58 @@
+import argparse
+import os
+import os.path as osp
+
+import mmcv
+from pytablewriter import Align, MarkdownTableWriter
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
+ parser.add_argument('table_cache', type=str, help='table_cache input')
+ parser.add_argument('out', type=str, help='output path md')
+
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ table_cache = mmcv.load(args.table_cache)
+ output_dir = args.out
+
+ writer = MarkdownTableWriter()
+ writer.headers = [
+ 'Method', 'Backbone', 'Crop Size', 'Lr schd', 'Mem (GB)',
+ 'Inf time (fps)', 'mIoU', 'mIoU(ms+flip)', 'download'
+ ]
+ writer.margin = 1
+ writer.align_list = [Align.CENTER] * len(writer.headers)
+ dataset_maps = {
+ 'cityscapes': 'Cityscapes',
+ 'ade20k': 'ADE20K',
+ 'voc12aug': 'Pascal VOC 2012 + Aug'
+ }
+ for directory in table_cache:
+ for dataset in table_cache[directory]:
+ table = table_cache[directory][dataset][0]
+ writer.table_name = dataset_maps[dataset]
+ writer.value_matrix = table
+ for i in range(len(table)):
+ if table[i][-4] != '-':
+ table[i][-4] = f'{table[i][-4]:.2f}'
+ mmcv.mkdir_or_exist(osp.join(output_dir, directory))
+ writer.dump(
+ osp.join(output_dir, directory, f'README_{dataset}.md'))
+ with open(osp.join(output_dir, directory, 'README.md'), 'w') as dst_f:
+ for dataset in dataset_maps:
+ dataset_md_file = osp.join(output_dir, directory,
+ f'README_{dataset}.md')
+ with open(dataset_md_file) as src_f:
+ for line in src_f:
+ dst_f.write(line)
+ dst_f.write('\n')
+ os.remove(dataset_md_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000..efd4305798
--- /dev/null
+++ b/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,76 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at chenkaidev@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000000..6ffa7b2e64
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,53 @@
+# Contributing to mmsegmentation
+
+All kinds of contributions are welcome, including but not limited to the following.
+
+- Fixes (typo, bugs)
+- New features and components
+
+## Workflow
+
+1. fork and pull the latest mmsegmentation
+2. checkout a new branch (do not use master branch for PRs)
+3. commit your changes
+4. create a PR
+
+Note
+- If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
+- If you are the author of some papers and would like to include your method to mmsegmentation,
+please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution.
+
+## Code style
+
+### Python
+We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
+
+We use the following tools for linting and formatting:
+- [flake8](http://flake8.pycqa.org/en/latest/): linter
+- [yapf](https://github.com/google/yapf): formatter
+- [isort](https://github.com/timothycrosley/isort): sort imports
+
+Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg).
+
+We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`,
+ fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
+The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
+
+After you clone the repository, you will need to install initialize pre-commit hook.
+
+```
+pip install -U pre-commit
+```
+
+From the repository folder
+```
+pre-commit install
+```
+
+After this on every commit check code linters and formatter will be enforced.
+
+
+>Before you create a PR, make sure that your code lints and is formatted by yapf.
+
+### C++ and CUDA
+We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000..3ba13e0cec
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/.github/ISSUE_TEMPLATE/error-report.md b/.github/ISSUE_TEMPLATE/error-report.md
new file mode 100644
index 0000000000..1b129c1574
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/error-report.md
@@ -0,0 +1,41 @@
+---
+name: Error report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+Thanks for your error report and we appreciate it a lot.
+
+**Checklist**
+1. I have searched related issues but cannot get the expected help.
+2. The bug has not been fixed in the latest version.
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Reproduction**
+1. What command or script did you run?
+```
+A placeholder for the command.
+```
+2. Did you make any modifications on the code or config? Did you understand what you have modified?
+3. What dataset did you use?
+
+**Environment**
+
+1. Please run `python mmseg/utils/collect_env.py` to collect necessary environment infomation and paste it here.
+2. You may add addition that may be helpful for locating the problem, such as
+ - How you installed PyTorch [e.g., pip, conda, source]
+ - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
+
+**Error traceback**
+If applicable, paste the error trackback here.
+```
+A placeholder for trackback.
+```
+
+**Bug fix**
+If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000..33f9d5f235
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,22 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the feature**
+
+**Motivation**
+A clear and concise description of the motivation of the feature.
+Ex1. It is inconvenient when [....].
+Ex2. There is a recent paper [....], which is very helpful for [....].
+
+**Related resources**
+If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
+If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
diff --git a/.github/ISSUE_TEMPLATE/general_questions.md b/.github/ISSUE_TEMPLATE/general_questions.md
new file mode 100644
index 0000000000..b5a6451a6c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/general_questions.md
@@ -0,0 +1,8 @@
+---
+name: General questions
+about: Ask general questions to get help
+title: ''
+labels: ''
+assignees: ''
+
+---
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000000..68afd6e5ec
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,101 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: build
+
+on: [push, pull_request]
+
+jobs:
+
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python 3.7
+ uses: actions/setup-python@v1
+ with:
+ python-version: 3.7
+ - name: Install linting dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install flake8 isort==4.3.21 yapf interrogate
+ - name: Lint with flake8
+ run: flake8 .
+ - name: Lint with isort
+ run: isort --recursive --check-only --diff mmseg/ tests/ examples/
+ - name: Format python codes with yapf
+ run: yapf -r -d mmseg/ tests/ examples/
+ - name: Check docstring
+ run: interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmseg/ops --ignore-regex "__repr__" --fail-under 80 mmseg
+
+ build:
+ env:
+ CUDA: 10.1.105-1
+ CUDA_SHORT: 10.1
+ UBUNTU_VERSION: ubuntu1804
+ FORCE_CUDA: 1
+ MMCV_CUDA_ARGS: -gencode=arch=compute_61,code=sm_61
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: [3.6, 3.7]
+ torch: [1.3.0+cpu, 1.5.0+cpu]
+ include:
+ - torch: 1.3.0+cpu
+ torchvision: 0.4.2+cpu
+ - torch: 1.5.0+cpu
+ torchvision: 0.6.0+cpu
+ - torch: 1.5.0+cpu
+ torchvision: 0.6.0+cpu
+ python-version: 3.8
+ - torch: 1.5.0+cu101
+ torchvision: 0.6.0+cu101
+ python-version: 3.7
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install CUDA
+ if: ${{matrix.torch == '1.5.0+cu101'}}
+ run: |
+ export INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
+ wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
+ sudo dpkg -i ${INSTALLER}
+ wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
+ sudo apt-key add 7fa2af80.pub
+ sudo apt update -qq
+ sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
+ sudo apt clean
+ export CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
+ export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
+ export PATH=${CUDA_HOME}/bin:${PATH}
+ sudo apt-get install -y ninja-build
+ - name: Install Pillow
+ if: ${{matrix.torchvision == '0.4.2+cpu'}}
+ run: pip install Pillow==6.2.2
+ - name: Install PyTorch
+ run: pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
+ - name: Install mmseg dependencies
+ run: |
+ pip install mmcv-full==latest+torch${{matrix.torch}} -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
+ pip install -r requirements.txt
+ - name: Build and install
+ run: rm -rf .eggs && pip install -e .
+ - name: Run unittests and generate coverage report
+ run: |
+ coverage run --branch --source mmseg -m pytest tests/
+ coverage xml
+ coverage report -m --omit="mmseg/utils/*","mmseg/apis/*"
+ # Only upload coverage report for python3.7 && pytorch1.5
+ - name: Upload coverage to Codecov
+ if: ${{matrix.torch == '1.5.0+cu101' && matrix.python-version == '3.7'}}
+ uses: codecov/codecov-action@v1.0.10
+ with:
+ file: ./coverage.xml
+ flags: unittests
+ env_vars: OS,PYTHON
+ name: codecov-umbrella
+ fail_ci_if_error: false
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..77824a97a7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,118 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+mmseg/version.py
+data
+.vscode
+.idea
+
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+
+# Pytorch
+*.pth
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..9e6d30895b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+repos:
+ - repo: https://gitlab.com/pycqa/flake8.git
+ rev: 3.8.3
+ hooks:
+ - id: flake8
+ - repo: https://github.com/asottile/seed-isort-config
+ rev: v2.2.0
+ hooks:
+ - id: seed-isort-config
+ - repo: https://github.com/timothycrosley/isort
+ rev: 4.3.21
+ hooks:
+ - id: isort
+ - repo: https://github.com/pre-commit/mirrors-yapf
+ rev: v0.30.0
+ hooks:
+ - id: yapf
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v3.1.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: requirements-txt-fixer
+ - id: double-quote-string-fixer
+ - id: check-merge-conflict
+ - id: fix-encoding-pragma
+ args: ["--remove"]
+ - id: mixed-line-ending
+ args: ["--fix=lf"]
+ - repo: https://github.com/myint/docformatter
+ rev: v1.3.1
+ hooks:
+ - id: docformatter
+ args: ["--in-place", "--wrap-descriptions", "79"]
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000000..286a3f1d7a
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,4 @@
+[style]
+BASED_ON_STYLE = pep8
+BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
+SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
diff --git a/LICENSE b/LICENSE
index 080fc4ea8e..38e625bf59 100644
--- a/LICENSE
+++ b/LICENSE
@@ -200,4 +200,4 @@ Copyright 2020 The MMSegmentation Authors. All rights reserved.
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
- limitations under the License.
\ No newline at end of file
+ limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..9e7cf39b3f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,96 @@
+
+
+
+
+## Introduction
+
+MMSegmentation is an open source semantic segmentation toolbox based on PyTorch.
+It is a part of the OpenMMLab project.
+
+The master branch works with **PyTorch 1.3 to 1.5**.
+
+
+
+### Major features
+
+- **Unified Benchmark**
+
+ We provide a unified benchmark toolbox for various semantic segmentation methods.
+
+- **Modular Design**
+
+ We decompose the semantic segmentation framework into different components and one can easily construct a customized semantic segmentation framework by combining different modules.
+
+- **Support of multiple methods out of box**
+
+ The toolbox directly supports popular and contemporary semantic segmentation frameworks, *e.g.* PSPNet, DeepLabV3, PSANet, DeepLabV3+, etc.
+
+- **High efficiency**
+
+ The training speed is faster than or comparable to other codebases.
+
+## License
+
+This project is released under the [Apache 2.0 license](LICENSE).
+
+## Benchmark and model zoo
+
+Results and models are available in the [model zoo](docs/model_zoo.md).
+
+Supported backbones:
+- [x] ResNet
+- [x] ResNeXt
+- [x] HRNet
+
+Supported methods:
+- [x] [FCN](configs/fcn)
+- [x] [PSPNet](configs/pspnet)
+- [x] [DeepLabV3](configs/deeplabv3)
+- [x] [PSANet](configs/psanet)
+- [x] [DeepLabV3+](configs/deeplabv3plus)
+- [x] [UPerNet](configs/upernet)
+- [x] [NonLocal Net](configs/nonlocal_net)
+- [x] [CCNet](configs/ccnet)
+- [x] [DANet](configs/danet)
+- [x] [GCNet](configs/gcnet)
+- [x] [ANN](configs/ann)
+- [x] [OCRNet](configs/ocrnet)
+
+## Installation
+
+Please refer to [INSTALL.md](docs/install.md) for installation and dataset preparation.
+
+## Get Started
+
+Please see [getting_started.md](docs/getting_started.md) for the basic usage of MMSegmentation.
+There are also tutorials for [adding new dataset](docs/tutorials/new_dataset.md), [designing data pipeline](docs/tutorials/data_pipeline.md), and [adding new modules](docs/tutorials/new_modules.md).
+
+## Contributing
+
+We appreciate all contributions to improve MMSegmentation. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline.
+
+## Acknowledgement
+
+MMSegmentation is an open source project that welcome any contribution and feedback.
+We wish that the toolbox and benchmark could serve the growing research
+community by providing a flexible as well as standardized toolkit to reimplement existing methods
+and develop their own new semantic segmentation methods.
+
+Many thanks to Ruobing Han ([@drcut](https://github.com/drcut)), Xiaoming Ma([@aishangmaxiaoming](https://github.com/aishangmaxiaoming)), Shiguang Wang ([@sunnyxiaohu](https://github.com/sunnyxiaohu)) for deployment support.
+
+## Citation
+
+If you use this toolbox or benchmark in your research, please cite this project.
+
+```
+@misc{mmseg2020,
+ author={Xu, Jiarui and Chen, Kai and Lin, Dahua},
+ title={{MMSegmenation}},
+ howpublished={\url{https://github.com/open-mmlab/mmsegmentation}},
+ year={2020}
+}
+```
+
+## Contact
+
+This repo is currently maintained by Jiarui Xu ([@xvjiarui](https://github.com/xvjiarui)), Kai Chen ([@hellock](http://github.com/hellock)).
diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py
new file mode 100644
index 0000000000..a1d9baba7c
--- /dev/null
+++ b/configs/_base_/datasets/ade20k.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'ADE20KDataset'
+data_root = 'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 512),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=4,
+ workers_per_gpu=4,
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='images/training',
+ ann_dir='annotations/training',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='images/validation',
+ ann_dir='annotations/validation',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='images/validation',
+ ann_dir='annotations/validation',
+ pipeline=test_pipeline))
diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py
new file mode 100644
index 0000000000..21cf5c3958
--- /dev/null
+++ b/configs/_base_/datasets/cityscapes.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 1024),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='leftImg8bit/train',
+ ann_dir='gtFine/train',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='leftImg8bit/val',
+ ann_dir='gtFine/val',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='leftImg8bit/val',
+ ann_dir='gtFine/val',
+ pipeline=test_pipeline))
diff --git a/configs/_base_/datasets/cityscapes_769x769.py b/configs/_base_/datasets/cityscapes_769x769.py
new file mode 100644
index 0000000000..a5bcff3710
--- /dev/null
+++ b/configs/_base_/datasets/cityscapes_769x769.py
@@ -0,0 +1,35 @@
+_base_ = './cityscapes.py'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (769, 769)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2049, 1025),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ train=dict(pipeline=train_pipeline),
+ val=dict(pipeline=test_pipeline),
+ test=dict(pipeline=test_pipeline))
diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py
new file mode 100644
index 0000000000..6a367c7f1d
--- /dev/null
+++ b/configs/_base_/datasets/pascal_voc12.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'PascalVOCDataset'
+data_root = 'data/VOCdevkit/VOC2012'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 512),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=4,
+ workers_per_gpu=4,
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='JPEGImages',
+ ann_dir='SegmentationClass',
+ split='ImageSets/Segmentation/train.txt',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='JPEGImages',
+ ann_dir='SegmentationClass',
+ split='ImageSets/Segmentation/val.txt',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='JPEGImages',
+ ann_dir='SegmentationClass',
+ split='ImageSets/Segmentation/val.txt',
+ pipeline=test_pipeline))
diff --git a/configs/_base_/datasets/pascal_voc12_aug.py b/configs/_base_/datasets/pascal_voc12_aug.py
new file mode 100644
index 0000000000..3f23b6717d
--- /dev/null
+++ b/configs/_base_/datasets/pascal_voc12_aug.py
@@ -0,0 +1,9 @@
+_base_ = './pascal_voc12.py'
+# dataset settings
+data = dict(
+ train=dict(
+ ann_dir=['SegmentationClass', 'SegmentationClassAug'],
+ split=[
+ 'ImageSets/Segmentation/train.txt',
+ 'ImageSets/Segmentation/aug.txt'
+ ]))
diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py
new file mode 100644
index 0000000000..b564cc4e7e
--- /dev/null
+++ b/configs/_base_/default_runtime.py
@@ -0,0 +1,14 @@
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook', by_epoch=False),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+cudnn_benchmark = True
diff --git a/configs/_base_/models/ann_r50-d8.py b/configs/_base_/models/ann_r50-d8.py
new file mode 100644
index 0000000000..c2287b4790
--- /dev/null
+++ b/configs/_base_/models/ann_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='ANNHead',
+ in_channels=[1024, 2048],
+ in_index=[2, 3],
+ channels=512,
+ project_channels=256,
+ query_scales=(1, ),
+ key_pool_scales=(1, 3, 6, 8),
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/ccnet_r50-d8.py b/configs/_base_/models/ccnet_r50-d8.py
new file mode 100644
index 0000000000..9f2794c33c
--- /dev/null
+++ b/configs/_base_/models/ccnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='CCHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ recurrence=2,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/danet_r50-d8.py b/configs/_base_/models/danet_r50-d8.py
new file mode 100644
index 0000000000..76a27054ed
--- /dev/null
+++ b/configs/_base_/models/danet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='DAHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ pam_channels=64,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/deeplabv3_r50-d8.py b/configs/_base_/models/deeplabv3_r50-d8.py
new file mode 100644
index 0000000000..00c1f8796d
--- /dev/null
+++ b/configs/_base_/models/deeplabv3_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='ASPPHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ dilations=(1, 12, 24, 36),
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/deeplabv3plus_r50-d8.py b/configs/_base_/models/deeplabv3plus_r50-d8.py
new file mode 100644
index 0000000000..f930b154f5
--- /dev/null
+++ b/configs/_base_/models/deeplabv3plus_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='DepthwiseSeparableASPPHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ dilations=(1, 12, 24, 36),
+ c1_in_channels=256,
+ c1_channels=48,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/encnet_r50-d8.py b/configs/_base_/models/encnet_r50-d8.py
new file mode 100644
index 0000000000..46fffa1f8c
--- /dev/null
+++ b/configs/_base_/models/encnet_r50-d8.py
@@ -0,0 +1,48 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='EncHead',
+ in_channels=[512, 1024, 2048],
+ in_index=(1, 2, 3),
+ channels=512,
+ num_codes=32,
+ use_se_loss=True,
+ add_lateral=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_se_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/fcn_hr18.py b/configs/_base_/models/fcn_hr18.py
new file mode 100644
index 0000000000..e2583a2ac8
--- /dev/null
+++ b/configs/_base_/models/fcn_hr18.py
@@ -0,0 +1,52 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144)))),
+ decode_head=dict(
+ type='FCNHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ channels=sum([18, 36, 72, 144]),
+ input_transform='resize_concat',
+ kernel_size=1,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/fcn_r50-d8.py b/configs/_base_/models/fcn_r50-d8.py
new file mode 100644
index 0000000000..08546755c9
--- /dev/null
+++ b/configs/_base_/models/fcn_r50-d8.py
@@ -0,0 +1,45 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='FCNHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ num_convs=2,
+ concat_input=True,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/gcnet_r50-d8.py b/configs/_base_/models/gcnet_r50-d8.py
new file mode 100644
index 0000000000..9057687c06
--- /dev/null
+++ b/configs/_base_/models/gcnet_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='GCHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ ratio=1 / 4.,
+ pooling_type='att',
+ fusion_types=('channel_add', ),
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/nonlocal_r50-d8.py b/configs/_base_/models/nonlocal_r50-d8.py
new file mode 100644
index 0000000000..7fa88f9a59
--- /dev/null
+++ b/configs/_base_/models/nonlocal_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='NLHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ drop_out_ratio=0.1,
+ reduction=2,
+ use_scale=True,
+ mode='embedded_gaussian',
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/ocrnet_hr18.py b/configs/_base_/models/ocrnet_hr18.py
new file mode 100644
index 0000000000..4053daa0b0
--- /dev/null
+++ b/configs/_base_/models/ocrnet_hr18.py
@@ -0,0 +1,68 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ pretrained='open-mmlab://msra/hrnetv2_w18',
+ backbone=dict(
+ type='HRNet',
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[18, 36, 72, 144],
+ channels=sum([18, 36, 72, 144]),
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ kernel_size=1,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ channels=512,
+ ocr_channels=256,
+ drop_out_ratio=-1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ ])
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/psanet_r50-d8.py b/configs/_base_/models/psanet_r50-d8.py
new file mode 100644
index 0000000000..170b48f457
--- /dev/null
+++ b/configs/_base_/models/psanet_r50-d8.py
@@ -0,0 +1,49 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='PSAHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ mask_size=(97, 97),
+ psa_type='bi-direction',
+ compact=False,
+ shrink_factor=2,
+ normalization_factor=1.0,
+ psa_softmax=True,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/pspnet_r50-d8.py b/configs/_base_/models/pspnet_r50-d8.py
new file mode 100644
index 0000000000..c5bb885c58
--- /dev/null
+++ b/configs/_base_/models/pspnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='PSPHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ pool_scales=(1, 2, 3, 6),
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/models/upernet_r50.py b/configs/_base_/models/upernet_r50.py
new file mode 100644
index 0000000000..7d736f6bcf
--- /dev/null
+++ b/configs/_base_/models/upernet_r50.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='open-mmlab://resnet50_v1c',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 1, 1),
+ strides=(1, 2, 2, 2),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='UPerHead',
+ in_channels=[256, 512, 1024, 2048],
+ in_index=[0, 1, 2, 3],
+ pool_scales=(1, 2, 3, 6),
+ channels=512,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=dict(
+ type='FCNHead',
+ in_channels=1024,
+ in_index=2,
+ channels=256,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py
new file mode 100644
index 0000000000..8fe4b04d22
--- /dev/null
+++ b/configs/_base_/schedules/schedule_160k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+total_iters = 160000
+checkpoint_config = dict(by_epoch=False, interval=16000)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py
new file mode 100644
index 0000000000..d3903d6452
--- /dev/null
+++ b/configs/_base_/schedules/schedule_20k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+total_iters = 20000
+checkpoint_config = dict(by_epoch=False, interval=2000)
+evaluation = dict(interval=2000, metric='mIoU')
diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py
new file mode 100644
index 0000000000..b1449219cb
--- /dev/null
+++ b/configs/_base_/schedules/schedule_40k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+total_iters = 40000
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py
new file mode 100644
index 0000000000..3a77b41d45
--- /dev/null
+++ b/configs/_base_/schedules/schedule_80k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+total_iters = 80000
+checkpoint_config = dict(by_epoch=False, interval=8000)
+evaluation = dict(interval=8000, metric='mIoU')
diff --git a/configs/ann/README.md b/configs/ann/README.md
new file mode 100644
index 0000000000..e3e217c4bd
--- /dev/null
+++ b/configs/ann/README.md
@@ -0,0 +1,46 @@
+# Asymmetric Non-local Neural Networks for Semantic Segmentation
+
+## Introduction
+```
+@inproceedings{annn,
+ author = {Zhen Zhu and
+ Mengde Xu and
+ Song Bai and
+ Tengteng Huang and
+ Xiang Bai},
+ title = {Asymmetric Non-local Neural Networks for Semantic Segmentation},
+ booktitle={International Conference on Computer Vision},
+ year = {2019},
+ url = {http://arxiv.org/abs/1908.07678},
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ANN | R-50-D8 | 512x1024 | 40000 | 6 | 3.71 | 77.40 | 78.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json) |
+| ANN | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.55 | 76.55 | 78.85 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json) |
+| ANN | R-50-D8 | 769x769 | 40000 | 6.8 | 1.70 | 78.89 | 80.46 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json) |
+| ANN | R-101-D8 | 769x769 | 40000 | 10.7 | 1.15 | 79.32 | 80.94 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json) |
+| ANN | R-50-D8 | 512x1024 | 80000 | - | - | 77.34 | 78.65 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json) |
+| ANN | R-101-D8 | 512x1024 | 80000 | - | - | 77.14 | 78.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json) |
+| ANN | R-50-D8 | 769x769 | 80000 | - | - | 78.88 | 80.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json) |
+| ANN | R-101-D8 | 769x769 | 80000 | - | - | 78.80 | 80.34 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ANN | R-50-D8 | 512x512 | 80000 | 9.1 | 21.01 | 41.01 | 42.30 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json) |
+| ANN | R-101-D8 | 512x512 | 80000 | 12.5 | 14.12 | 42.94 | 44.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json) |
+| ANN | R-50-D8 | 512x512 | 160000 | - | - | 41.74 | 42.62 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json) |
+| ANN | R-101-D8 | 512x512 | 160000 | - | - | 42.94 | 44.06 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ANN | R-50-D8 | 512x512 | 20000 | 6 | 20.92 | 74.86 | 76.13 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json) |
+| ANN | R-101-D8 | 512x512 | 20000 | 9.5 | 13.94 | 77.47 | 78.70 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json) |
+| ANN | R-50-D8 | 512x512 | 40000 | - | - | 76.56 | 77.51 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json) |
+| ANN | R-101-D8 | 512x512 | 40000 | - | - | 76.70 | 78.06 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json) |
diff --git a/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py b/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..d494e07333
--- /dev/null
+++ b/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py b/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..1eeff0b030
--- /dev/null
+++ b/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_512x512_160k_ade20k.py b/configs/ann/ann_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..9e43af541f
--- /dev/null
+++ b/configs/ann/ann_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py b/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..d854f2e422
--- /dev/null
+++ b/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py b/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..893c53b1ca
--- /dev/null
+++ b/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_512x512_80k_ade20k.py b/configs/ann/ann_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..a64dac670e
--- /dev/null
+++ b/configs/ann/ann_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py b/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..5950824849
--- /dev/null
+++ b/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py b/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..a9c712d1cc
--- /dev/null
+++ b/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py b/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..00b2594ba8
--- /dev/null
+++ b/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py b/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..ef7b369dd9
--- /dev/null
+++ b/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/ann/ann_r50-d8_512x512_160k_ade20k.py b/configs/ann/ann_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..ca6bb248ac
--- /dev/null
+++ b/configs/ann/ann_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py b/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..071f190261
--- /dev/null
+++ b/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py b/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..82a1c9386c
--- /dev/null
+++ b/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/ann/ann_r50-d8_512x512_80k_ade20k.py b/configs/ann/ann_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..5e04aa7c6a
--- /dev/null
+++ b/configs/ann/ann_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py b/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..393a400beb
--- /dev/null
+++ b/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py b/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..7861a372e9
--- /dev/null
+++ b/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/ann_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/ccnet/README.md b/configs/ccnet/README.md
new file mode 100644
index 0000000000..436a962340
--- /dev/null
+++ b/configs/ccnet/README.md
@@ -0,0 +1,41 @@
+# CCNet: Criss-Cross Attention for Semantic Segmentation
+
+## Introduction
+```
+@article{huang2018ccnet,
+ title={CCNet: Criss-Cross Attention for Semantic Segmentation},
+ author={Huang, Zilong and Wang, Xinggang and Huang, Lichao and Huang, Chang and Wei, Yunchao and Liu, Wenyu},
+ booktitle={ICCV},
+ year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCNet | R-50-D8 | 512x1024 | 40000 | 6 | 3.32 | 77.76 | 78.87 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json) |
+| CCNet | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.31 | 76.35 | 78.19 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json) |
+| CCNet | R-50-D8 | 769x769 | 40000 | 6.8 | 1.43 | 78.46 | 79.93 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json) |
+| CCNet | R-101-D8 | 769x769 | 40000 | 10.7 | 1.01 | 76.94 | 78.62 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json) |
+| CCNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.03 | 80.16 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json) |
+| CCNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.87 | 79.90 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json) |
+| CCNet | R-50-D8 | 769x769 | 80000 | - | - | 79.29 | 81.08 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json) |
+| CCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.45 | 80.66 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.89 | 41.78 | 42.98 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json) |
+| CCNet | R-101-D8 | 512x512 | 80000 | 12.2 | 14.11 | 43.97 | 45.13 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json) |
+| CCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.08 | 43.13 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json) |
+| CCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.71 | 45.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCNet | R-50-D8 | 512x512 | 20000 | 6 | 20.45 | 76.17 | 77.51 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json) |
+| CCNet | R-101-D8 | 512x512 | 20000 | 9.5 | 13.64 | 77.27 | 79.02 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json) |
+| CCNet | R-50-D8 | 512x512 | 40000 | - | - | 75.96 | 77.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json) |
+| CCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.87 | 78.90 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json) |
diff --git a/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..d2bac38ca6
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..989928ab7f
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py b/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..c32bf48751
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py b/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..53eb77c0cd
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py b/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..d7eb668f39
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py b/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..029c1d525b
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..43f05fab05
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..654f377b6f
--- /dev/null
+++ b/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..6a4316dde5
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..16e34356e9
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py b/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..1ad94d8988
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py b/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..bbcd29ccea
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py b/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..947b8ac8ce
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py b/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..1a1f49cf6b
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..d7fd8ccc59
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..6d3b3498bf
--- /dev/null
+++ b/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/ccnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/danet/README.md b/configs/danet/README.md
new file mode 100644
index 0000000000..5550890de4
--- /dev/null
+++ b/configs/danet/README.md
@@ -0,0 +1,41 @@
+# Dual Attention Network for Scene Segmentation
+
+## Introduction
+```
+@article{fu2018dual,
+ title={Dual Attention Network for Scene Segmentation},
+ author={Jun Fu, Jing Liu, Haijie Tian, Yong Li, Yongjun Bao, Zhiwei Fang,and Hanqing Lu},
+ booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DANet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.66 | 78.74 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json) |
+| DANet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.99 | 80.52 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json) |
+| DANet | R-50-D8 | 769x769 | 40000 | 8.8 | 1.56 | 78.88 | 80.62 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json) |
+| DANet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.07 | 79.88 | 81.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json) |
+| DANet | R-50-D8 | 512x1024 | 80000 | - | - | 79.34 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json) |
+| DANet | R-101-D8 | 512x1024 | 80000 | - | - | 80.41 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json) |
+| DANet | R-50-D8 | 769x769 | 80000 | - | - | 79.27 | 80.96 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json) |
+| DANet | R-101-D8 | 769x769 | 80000 | - | - | 80.47 | 82.02 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DANet | R-50-D8 | 512x512 | 80000 | 11.5 | 21.20 | 41.66 | 42.90 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json) |
+| DANet | R-101-D8 | 512x512 | 80000 | 15 | 14.18 | 43.64 | 45.19 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json) |
+| DANet | R-50-D8 | 512x512 | 160000 | - | - | 42.45 | 43.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json) |
+| DANet | R-101-D8 | 512x512 | 160000 | - | - | 44.17 | 45.02 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DANet | R-50-D8 | 512x512 | 20000 | 6.5 | 20.94 | 74.45 | 75.69 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json) |
+| DANet | R-101-D8 | 512x512 | 20000 | 9.9 | 13.76 | 76.02 | 77.23 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json) |
+| DANet | R-50-D8 | 512x512 | 40000 | - | - | 76.37 | 77.29 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json) |
+| DANet | R-101-D8 | 512x512 | 40000 | - | - | 76.51 | 77.32 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json) |
diff --git a/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py b/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..3bfb9bdb30
--- /dev/null
+++ b/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py b/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..d80b2ec160
--- /dev/null
+++ b/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_512x512_160k_ade20k.py b/configs/danet/danet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..0f22d0fb63
--- /dev/null
+++ b/configs/danet/danet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py b/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..709f93cba3
--- /dev/null
+++ b/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py b/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..5c623eb568
--- /dev/null
+++ b/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_512x512_80k_ade20k.py b/configs/danet/danet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..bd31bc8f28
--- /dev/null
+++ b/configs/danet/danet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py b/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..597d76de79
--- /dev/null
+++ b/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py b/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..70f9b31966
--- /dev/null
+++ b/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py b/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..1b70c5b8d4
--- /dev/null
+++ b/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py b/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..03734310d7
--- /dev/null
+++ b/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/danet/danet_r50-d8_512x512_160k_ade20k.py b/configs/danet/danet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..22aaf857c3
--- /dev/null
+++ b/configs/danet/danet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py b/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..010f86f1aa
--- /dev/null
+++ b/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py b/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..0cef0f09bf
--- /dev/null
+++ b/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/danet/danet_r50-d8_512x512_80k_ade20k.py b/configs/danet/danet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..154e84890e
--- /dev/null
+++ b/configs/danet/danet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py b/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..b8fba930a8
--- /dev/null
+++ b/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py b/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..8b8915d856
--- /dev/null
+++ b/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/danet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/deeplabv3/README.md b/configs/deeplabv3/README.md
new file mode 100644
index 0000000000..37e2ee6baa
--- /dev/null
+++ b/configs/deeplabv3/README.md
@@ -0,0 +1,43 @@
+# Rethinking atrous convolution for semantic image segmentation
+
+## Introduction
+```
+@article{chen2017rethinking,
+ title={Rethinking atrous convolution for semantic image segmentation},
+ author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig},
+ journal={arXiv preprint arXiv:1706.05587},
+ year={2017}
+}
+```
+
+## Results and models
+
+Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series.
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-50-D8 | 512x1024 | 40000 | 6.1 | 2.57 | 79.09 | 80.45 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json) |
+| DeepLabV3 | R-101-D8 | 512x1024 | 40000 | 9.6 | 1.92 | 77.12 | 79.61 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) |
+| DeepLabV3 | R-50-D8 | 769x769 | 40000 | 6.9 | 1.11 | 78.58 | 79.89 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json) |
+| DeepLabV3 | R-101-D8 | 769x769 | 40000 | 10.9 | 0.83 | 79.27 | 80.11 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json) |
+| DeepLabV3 | R-50-D8 | 512x1024 | 80000 | - | - | 79.32 | 80.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json) |
+| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | - | - | 80.20 | 81.21 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) |
+| DeepLabV3 | R-50-D8 | 769x769 | 80000 | - | - | 79.89 | 81.06 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json) |
+| DeepLabV3 | R-101-D8 | 769x769 | 80000 | - | - | 79.67 | 80.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 8.9 | 14.76 | 42.42 | 43.28 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) |
+| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 12.4 | 10.14 | 44.08 | 45.19 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json) |
+| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | 42.66 | 44.09 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json) |
+| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | 45.00 | 46.66 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 6.1 | 13.88 | 76.17 | 77.42 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json) |
+| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) |
+| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) |
+| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) |
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..8c707c79d6
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..6804a57813
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..df6f36ef7c
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..40f5f62373
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..fb2be22f8b
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..796ba3fb14
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..e6d58a67b3
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..13094a98ee
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..8e7420d24a
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..132787db98
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..742e17d749
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..f62da1a809
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..492bd3dfdc
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..5ddef212f7
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..fb067d2117
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..8b8692140b
--- /dev/null
+++ b/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/deeplabv3_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/deeplabv3plus/README.md b/configs/deeplabv3plus/README.md
new file mode 100644
index 0000000000..591554daea
--- /dev/null
+++ b/configs/deeplabv3plus/README.md
@@ -0,0 +1,43 @@
+# Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation
+
+## Introduction
+```
+@inproceedings{deeplabv3plus2018,
+ title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
+ author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
+ booktitle={ECCV},
+ year={2018}
+}
+```
+
+## Results and models
+
+Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series.
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-50-D8 | 512x1024 | 40000 | 7.5 | 3.94 | 79.61 | 81.01 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json) |
+| DeepLabV3+ | R-101-D8 | 512x1024 | 40000 | 11 | 2.60 | 80.21 | 81.82 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) |
+| DeepLabV3+ | R-50-D8 | 769x769 | 40000 | 8.5 | 1.72 | 78.97 | 80.46 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json) |
+| DeepLabV3+ | R-101-D8 | 769x769 | 40000 | 12.5 | 1.15 | 79.46 | 80.50 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json) |
+| DeepLabV3+ | R-50-D8 | 512x1024 | 80000 | - | - | 80.09 | 81.13 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json) |
+| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | - | - | 80.97 | 82.03 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) |
+| DeepLabV3+ | R-50-D8 | 769x769 | 80000 | - | - | 79.83 | 81.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json) |
+| DeepLabV3+ | R-101-D8 | 769x769 | 80000 | - | - | 80.98 | 82.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 10.6 | 21.01 | 42.72 | 43.75 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) |
+| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 14.1 | 14.16 | 44.60 | 46.06 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json) |
+| DeepLabV3+ | R-50-D8 | 512x512 | 160000 | - | - | 43.95 | 44.93 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json) |
+| DeepLabV3+ | R-101-D8 | 512x512 | 160000 | - | - | 45.47 | 46.35 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) |
+
+#### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-50-D8 | 512x512 | 20000 | 7.6 | 21 | 75.93 | 77.50 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json) |
+| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) |
+| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) |
+| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) |
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..d6ce85aea5
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..0ebbd3c70e
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..a75c9d3019
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..ebb1a8eaee
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..3caa6cf8ae
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..53fd3a9095
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..c3c92eb26f
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..5ea9cdb5b6
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..7243d0390f
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py',
+ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..3304d3677f
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py',
+ '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..e734880956
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..1056ad4d1e
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..e36c83ba60
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..8705972631
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..4fcc062ca8
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..e0bfa94576
--- /dev/null
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/deeplabv3plus_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/encnet/README.md b/configs/encnet/README.md
new file mode 100644
index 0000000000..9f1edde82a
--- /dev/null
+++ b/configs/encnet/README.md
@@ -0,0 +1,34 @@
+# Context Encoding for Semantic Segmentation
+
+## Introduction
+```
+@InProceedings{Zhang_2018_CVPR,
+author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
+title = {Context Encoding for Semantic Segmentation},
+booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+month = {June},
+year = {2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| encnet | R-50-D8 | 512x1024 | 40000 | 8.6 | 4.58 | 75.67 | 77.08 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json) |
+| encnet | R-101-D8 | 512x1024 | 40000 | 12.1 | 2.66 | 75.81 | 77.21 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) |
+| encnet | R-50-D8 | 769x769 | 40000 | 9.8 | 1.82 | 76.24 | 77.85 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json) |
+| encnet | R-101-D8 | 769x769 | 40000 | 13.7 | 1.26 | 74.25 | 76.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json) |
+| encnet | R-50-D8 | 512x1024 | 80000 | - | - | 77.94 | 79.13 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json) |
+| encnet | R-101-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) |
+| encnet | R-50-D8 | 769x769 | 80000 | - | - | 77.44 | 78.72 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json) |
+| encnet | R-101-D8 | 769x769 | 80000 | - | - | 76.10 | 76.97 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| encnet | R-50-D8 | 512x512 | 80000 | 10.1 | 22.81 | 39.53 | 41.17 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json) |
+| encnet | R-101-D8 | 512x512 | 80000 | 13.6 | 14.87 | 42.11 | 43.61 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json) |
+| encnet | R-50-D8 | 512x512 | 160000 | - | - | 40.10 | 41.71 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json) |
+| encnet | R-101-D8 | 512x512 | 160000 | - | - | 42.61 | 44.01 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) |
diff --git a/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py b/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..f34373d9eb
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py b/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..0b0207b314
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py b/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..8fec6ba255
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py b/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..c264af998b
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py b/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..8a6968ea58
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..94151004ea
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py b/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..d6ade67b76
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py b/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..55648c08b2
--- /dev/null
+++ b/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py b/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..4ea6ed0e84
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py b/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..d2feeef7e9
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py b/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..2a5dc203cc
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py b/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..9cb7952ced
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py b/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..81f3cbfbf5
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..835375cb04
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py b/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..9f44b425d4
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py b/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..aac7f2d443
--- /dev/null
+++ b/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..600b701a71
--- /dev/null
+++ b/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py
@@ -0,0 +1,8 @@
+_base_ = [
+ '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ backbone=dict(stem_channels=128),
+ decode_head=dict(num_classes=150),
+ auxiliary_head=dict(num_classes=150))
diff --git a/configs/fcn/README.md b/configs/fcn/README.md
new file mode 100644
index 0000000000..6ec2080123
--- /dev/null
+++ b/configs/fcn/README.md
@@ -0,0 +1,45 @@
+# Fully Convolutional Networks for Semantic Segmentation
+
+## Introduction
+```
+@article{shelhamer2017fully,
+ title={Fully convolutional networks for semantic segmentation},
+ author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor},
+ journal={IEEE transactions on pattern analysis and machine intelligence},
+ volume={39},
+ number={4},
+ pages={640--651},
+ year={2017},
+ publisher={IEEE Trans Pattern Anal Mach Intell}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN | R-50-D8 | 512x1024 | 40000 | 5.7 | 4.17 | 72.25 | 73.36 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json) |
+| FCN | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.66 | 75.45 | 76.58 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) |
+| FCN | R-50-D8 | 769x769 | 40000 | 6.5 | 1.80 | 71.47 | 72.54 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json) |
+| FCN | R-101-D8 | 769x769 | 40000 | 10.4 | 1.19 | 73.93 | 75.14 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json) |
+| FCN | R-50-D8 | 512x1024 | 80000 | - | - | 73.61 | 74.24 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json) |
+| FCN | R-101-D8 | 512x1024 | 80000 | - | - | 75.13 | 75.94 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) |
+| FCN | R-50-D8 | 769x769 | 80000 | - | - | 72.64 | 73.32 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json) |
+| FCN | R-101-D8 | 769x769 | 80000 | - | - | 75.52 | 76.61 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN | R-50-D8 | 512x512 | 80000 | 8.5 | 23.49 | 35.94 | 37.94 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json) |
+| FCN | R-101-D8 | 512x512 | 80000 | 12 | 14.78 | 39.61 | 40.83 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json) |
+| FCN | R-50-D8 | 512x512 | 160000 | - | - | 36.10 | 38.08 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json) |
+| FCN | R-101-D8 | 512x512 | 160000 | - | - | 39.91 | 41.40 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN | R-50-D8 | 512x512 | 20000 | 5.7 | 23.28 | 67.08 | 69.94 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) |
+| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) |
+| FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
+| FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) |
diff --git a/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py b/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..7918dd10d0
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..528110dc73
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py b/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..1bf6780f2c
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py b/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..09a5fe5468
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py b/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..eafefaa675
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py b/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..6d0294530f
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py b/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..6b4cc57129
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..3503c76935
--- /dev/null
+++ b/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py b/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..401c6ea733
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..990a085eda
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py b/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..db272d6b5b
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py b/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..17206a5171
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py b/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..8cec429c3e
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py b/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..64997c26f7
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py b/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..9a91f9cc96
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..bbde29e8e9
--- /dev/null
+++ b/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/fcn_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md
new file mode 100644
index 0000000000..44c4a40511
--- /dev/null
+++ b/configs/gcnet/README.md
@@ -0,0 +1,42 @@
+# GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond
+
+## Introduction
+```
+@inproceedings{cao2019gcnet,
+ title={Gcnet: Non-local networks meet squeeze-excitation networks and beyond},
+ author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision Workshops},
+ pages={0--0},
+ year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GCNet | R-50-D8 | 512x1024 | 40000 | 5.8 | 3.93 | 77.69 | 78.56 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json) |
+| GCNet | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.61 | 78.28 | 79.34 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) |
+| GCNet | R-50-D8 | 769x769 | 40000 | 6.5 | 1.67 | 78.12 | 80.09 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json) |
+| GCNet | R-101-D8 | 769x769 | 40000 | 10.5 | 1.13 | 78.95 | 80.71 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json) |
+| GCNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.48 | 80.01 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json) |
+| GCNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.03 | 79.84 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) |
+| GCNet | R-50-D8 | 769x769 | 80000 | - | - | 78.68 | 80.66 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json) |
+| GCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.18 | 80.71 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GCNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.38 | 41.47 | 42.85 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json) |
+| GCNet | R-101-D8 | 512x512 | 80000 | 12 | 15.20 | 42.82 | 44.54 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json) |
+| GCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.37 | 43.52 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json) |
+| GCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.69 | 45.21 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GCNet | R-50-D8 | 512x512 | 20000 | 5.8 | 23.35 | 76.42 | 77.51 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json) |
+| GCNet | R-101-D8 | 512x512 | 20000 | 9.2 | 14.80 | 77.41 | 78.56 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) |
+| GCNet | R-50-D8 | 512x512 | 40000 | - | - | 76.24 | 77.63 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json) |
+| GCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.84 | 78.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) |
diff --git a/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..27bd9422da
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..7f0f83fe39
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py b/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..9888120f65
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py b/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..1b70ca8e46
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py b/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..b17c7a12b5
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py b/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..a2183fc2db
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..08a6031f20
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..5efb61339c
--- /dev/null
+++ b/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..610467c072
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..155e28f421
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py b/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..1549a4d5bf
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py b/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..a496204bdb
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py b/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..d85cf6550f
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py b/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..89d5e1ae0f
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..ac9826ad92
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..cacf24e4f3
--- /dev/null
+++ b/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/gcnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md
new file mode 100644
index 0000000000..4bb016e441
--- /dev/null
+++ b/configs/hrnet/README.md
@@ -0,0 +1,46 @@
+# Deep High-Resolution Representation Learning for Human Pose Estimation
+
+## Introduction
+```
+@inproceedings{SunXLW19,
+ title={Deep High-Resolution Representation Learning for Human Pose Estimation},
+ author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
+ booktitle={CVPR},
+ year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN | HRNetV2p-W18-Small | 512x1024 | 40000 | 1.7 | 23.74 | 73.86 | 75.91 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json) |
+| FCN | HRNetV2p-W18 | 512x1024 | 40000 | 2.9 | 12.97 | 77.19 | 78.92 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json) |
+| FCN | HRNetV2p-W48 | 512x1024 | 40000 | 6.2 | 6.42 | 78.48 | 79.69 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json) |
+| FCN | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 75.31 | 77.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json) |
+| FCN | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.65 | 80.35 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json) |
+| FCN | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 79.93 | 80.72 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json) |
+| FCN | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 76.31 | 78.31 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) |
+| FCN | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 78.80 | 80.74 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json) |
+| FCN | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 80.65 | 81.92 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 3.8 | 38.66 | 31.38 | 32.45 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json) |
+| FCN | HRNetV2p-W18 | 512x512 | 80000 | 4.9 | 22.57 | 35.51 | 36.80 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145-66f20cb7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145.log.json) |
+| FCN | HRNetV2p-W48 | 512x512 | 80000 | 8.2 | 21.23 | 41.90 | 43.27 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json) |
+| FCN | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 33.00 | 34.55 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413.log.json) |
+| FCN | HRNetV2p-W18 | 512x512 | 160000 | - | - | 36.79 | 38.58 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json) |
+| FCN | HRNetV2p-W48 | 512x512 | 160000 | - | - | 42.02 | 43.86 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN | HRNetV2p-W18-Small | 512x512 | 20000 | 1.8 | 43.36 | 65.20 | 68.55 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503-56e36088.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503.log.json) |
+| FCN | HRNetV2p-W18 | 512x512 | 20000 | 2.9 | 23.48 | 72.30 | 74.71 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json) |
+| FCN | HRNetV2p-W48 | 512x512 | 20000 | 6.2 | 22.05 | 75.87 | 78.58 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json) |
+| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) |
+| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) |
+| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) |
diff --git a/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000..9f04e935c3
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
diff --git a/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..99760c36d8
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..a653dda192
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..45ed99b681
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(decode_head=dict(num_classes=150))
diff --git a/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..f06448b168
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(decode_head=dict(num_classes=21))
diff --git a/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..d74e95943a
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(decode_head=dict(num_classes=21))
diff --git a/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..52bc9f5e91
--- /dev/null
+++ b/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(decode_head=dict(num_classes=150))
diff --git a/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000..ddbe3801f9
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x1024_160k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..4e31d26e09
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x1024_40k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..ee2831d99d
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x1024_80k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..22a3ce0b38
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_160k_ade20k.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..d0de5df752
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_20k_voc12aug.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..409db3c628
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_40k_voc12aug.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..a8400979b1
--- /dev/null
+++ b/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_80k_ade20k.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000..394a61c99f
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x1024_160k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..d37ab1d09e
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x1024_40k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..a9bab32b52
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x1024_80k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..dff4fea85c
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_160k_ade20k.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..a8d1deb986
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_20k_voc12aug.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..1084a57e97
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_40k_voc12aug.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..7eca7fa4b8
--- /dev/null
+++ b/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_80k_ade20k.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=dict(
+ in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/nonlocal_net/README.md b/configs/nonlocal_net/README.md
new file mode 100644
index 0000000000..dbd924dfe8
--- /dev/null
+++ b/configs/nonlocal_net/README.md
@@ -0,0 +1,42 @@
+# Non-local Neural Networks
+
+## Introduction
+```
+@inproceedings{wang2018non,
+ title={Non-local neural networks},
+ author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={7794--7803},
+ year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|----------|----------|-----------|--------:|----------|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| NonLocal | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.72 | 78.24 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json) |
+| NonLocal | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.95 | 78.66 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) |
+| NonLocal | R-50-D8 | 769x769 | 40000 | 8.9 | 1.52 | 78.33 | 79.92 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json) |
+| NonLocal | R-101-D8 | 769x769 | 40000 | 12.8 | 1.05 | 78.57 | 80.29 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json) |
+| NonLocal | R-50-D8 | 512x1024 | 80000 | - | - | 78.01 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json) |
+| NonLocal | R-101-D8 | 512x1024 | 80000 | - | - | 78.93 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) |
+| NonLocal | R-50-D8 | 769x769 | 80000 | - | - | 79.05 | 80.68 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json) |
+| NonLocal | R-101-D8 | 769x769 | 80000 | - | - | 79.40 | 80.85 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| NonLocal | R-50-D8 | 512x512 | 80000 | 9.1 | 21.37 | 40.75 | 42.05 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json) |
+| NonLocal | R-101-D8 | 512x512 | 80000 | 12.6 | 13.97 | 42.90 | 44.27 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json) |
+| NonLocal | R-50-D8 | 512x512 | 160000 | - | - | 42.03 | 43.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json) |
+| NonLocal | R-101-D8 | 512x512 | 160000 | - | - | 43.36 | 44.83 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422-affd0f8d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| NonLocal | R-50-D8 | 512x512 | 20000 | 6.4 | 21.21 | 76.20 | 77.12 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json) |
+| NonLocal | R-101-D8 | 512x512 | 20000 | 9.8 | 14.01 | 78.15 | 78.86 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) |
+| NonLocal | R-50-D8 | 512x512 | 40000 | - | - | 76.65 | 77.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json) |
+| NonLocal | R-101-D8 | 512x512 | 40000 | - | - | 78.27 | 79.12 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) |
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..ef7b06dd38
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..7a1e66cf1c
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..df9c2aca9c
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..490f9873a2
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..40d9190fba
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..0c6f60dac7
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..23e6da7f23
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..0627e2b5a7
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..9d4dc73903
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..b0672b687a
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..b1adfbab88
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..2e808d8072
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..66b443abec
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..8a7a2f509b
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..3f0d47238f
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..5d448c730a
--- /dev/null
+++ b/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/nonlocal_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/ocrnet/README.md b/configs/ocrnet/README.md
new file mode 100644
index 0000000000..fe9e05aaac
--- /dev/null
+++ b/configs/ocrnet/README.md
@@ -0,0 +1,46 @@
+# Object-Contextual Representations for Semantic Segmentation
+
+## Introduction
+```
+@article{yuan2019ocr,
+ title={Object-Contextual Representations for Semantic Segmentation},
+ author={Yuan Yuhui and Chen Xilin and Wang Jingdong},
+ journal={arXiv preprint arXiv:1909.11065},
+ year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 74.30 | 75.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) |
+| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) |
+| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet | HRNetV2p-W18-Small | 512x512 | 80000 | 6.7 | 28.98 | 35.06 | 35.80 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x512 | 80000 | 7.9 | 18.93 | 37.79 | 39.16 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x512 | 80000 | 11.2 | 16.99 | 43.00 | 44.30 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json) |
+| OCRNet | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 37.19 | 38.40 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x512 | 160000 | - | - | 39.32 | 40.80 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x512 | 160000 | - | - | 43.25 | 44.88 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet | HRNetV2p-W18-Small | 512x512 | 20000 | 3.5 | 31.55 | 71.70 | 73.84 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x512 | 20000 | 4.7 | 19.91 | 74.75 | 77.11 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x512 | 20000 | 8.1 | 17.83 | 77.72 | 79.87 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json) |
+| OCRNet | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 72.76 | 74.60 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) |
+| OCRNet | HRNetV2p-W18 | 512x512 | 40000 | - | - | 74.98 | 77.40 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json) |
+| OCRNet | HRNetV2p-W48 | 512x512 | 40000 | - | - | 77.14 | 79.71 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json) |
diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000..1c86eba17c
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..2c73b3839c
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..506ad9319a
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..fe5d20ffb0
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py
@@ -0,0 +1,35 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[18, 36, 72, 144],
+ channels=sum([18, 36, 72, 144]),
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ kernel_size=1,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=150,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ channels=512,
+ ocr_channels=256,
+ drop_out_ratio=-1,
+ num_classes=150,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..71e70dcec1
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py
@@ -0,0 +1,36 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[18, 36, 72, 144],
+ channels=sum([18, 36, 72, 144]),
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ kernel_size=1,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=21,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ channels=512,
+ ocr_channels=256,
+ drop_out_ratio=-1,
+ num_classes=21,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..b3fd747211
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py
@@ -0,0 +1,36 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[18, 36, 72, 144],
+ channels=sum([18, 36, 72, 144]),
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ kernel_size=1,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=21,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ channels=512,
+ ocr_channels=256,
+ drop_out_ratio=-1,
+ num_classes=21,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..e41eaf8ac5
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py
@@ -0,0 +1,35 @@
+_base_ = [
+ '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[18, 36, 72, 144],
+ channels=sum([18, 36, 72, 144]),
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ kernel_size=1,
+ num_convs=1,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=150,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[18, 36, 72, 144],
+ in_index=(0, 1, 2, 3),
+ input_transform='resize_concat',
+ channels=512,
+ ocr_channels=256,
+ drop_out_ratio=-1,
+ num_classes=150,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000..fc7909785f
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..923731f74f
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..be6bf16a2f
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..81f3d5cb91
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_160k_ade20k.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..ceb944815b
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..70babc91c9
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..36e77219ac
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_80k_ade20k.py'
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w18_small',
+ backbone=dict(
+ extra=dict(
+ stage1=dict(num_blocks=(2, )),
+ stage2=dict(num_blocks=(2, 2)),
+ stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+ stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000..70c1ce5b5b
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=19,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=19,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..cd777e89bf
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=19,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=19,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..6ed60096a1
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=19,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=19,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..f6cd20e642
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_160k_ade20k.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=150,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=150,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..3149cfc371
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=21,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=21,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..f97260039b
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=21,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=21,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..94dbe90298
--- /dev/null
+++ b/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_80k_ade20k.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w48',
+ backbone=dict(
+ extra=dict(
+ stage2=dict(num_channels=(48, 96)),
+ stage3=dict(num_channels=(48, 96, 192)),
+ stage4=dict(num_channels=(48, 96, 192, 384)))),
+ decode_head=[
+ dict(
+ type='FCNHead',
+ in_channels=[48, 96, 192, 384],
+ channels=sum([48, 96, 192, 384]),
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ kernel_size=1,
+ num_convs=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ drop_out_ratio=-1,
+ num_classes=150,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='OCRHead',
+ in_channels=[48, 96, 192, 384],
+ channels=512,
+ ocr_channels=256,
+ input_transform='resize_concat',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ drop_out_ratio=-1,
+ num_classes=150,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ ])
diff --git a/configs/psanet/README.md b/configs/psanet/README.md
new file mode 100644
index 0000000000..d6d94e36d2
--- /dev/null
+++ b/configs/psanet/README.md
@@ -0,0 +1,42 @@
+# PSANet: Point-wise Spatial Attention Network for Scene Parsing
+
+## Introduction
+```
+@inproceedings{zhao2018psanet,
+ title={Psanet: Point-wise spatial attention network for scene parsing},
+ author={Zhao, Hengshuang and Zhang, Yi and Liu, Shu and Shi, Jianping and Change Loy, Chen and Lin, Dahua and Jia, Jiaya},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ pages={267--283},
+ year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSANet | R-50-D8 | 512x1024 | 40000 | 7 | 3.17 | 77.63 | 79.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json) |
+| PSANet | R-101-D8 | 512x1024 | 40000 | 10.5 | 2.20 | 79.14 | 80.19 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) |
+| PSANet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.40 | 77.99 | 79.64 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json) |
+| PSANet | R-101-D8 | 769x769 | 40000 | 11.9 | 0.98 | 78.43 | 80.26 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json) |
+| PSANet | R-50-D8 | 512x1024 | 80000 | - | - | 77.24 | 78.69 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json) |
+| PSANet | R-101-D8 | 512x1024 | 80000 | - | - | 79.31 | 80.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) |
+| PSANet | R-50-D8 | 769x769 | 80000 | - | - | 79.31 | 80.91 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json) |
+| PSANet | R-101-D8 | 769x769 | 80000 | - | - | 79.69 | 80.89 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSANet | R-50-D8 | 512x512 | 80000 | 9 | 18.91 | 41.14 | 41.91 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json) |
+| PSANet | R-101-D8 | 512x512 | 80000 | 12.5 | 13.13 | 43.80 | 44.75 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json) |
+| PSANet | R-50-D8 | 512x512 | 160000 | - | - | 41.67 | 42.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json) |
+| PSANet | R-101-D8 | 512x512 | 160000 | - | - | 43.74 | 45.38 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSANet | R-50-D8 | 512x512 | 20000 | 6.9 | 18.24 | 76.39 | 77.34 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json) |
+| PSANet | R-101-D8 | 512x512 | 20000 | 10.4 | 12.63 | 77.91 | 79.30 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) |
+| PSANet | R-50-D8 | 512x512 | 40000 | - | - | 76.30 | 77.35 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json) |
+| PSANet | R-101-D8 | 512x512 | 40000 | - | - | 77.73 | 79.05 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) |
diff --git a/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py b/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..69d212f158
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py b/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..bc25d6aaf6
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py b/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..7f6795e5ef
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py b/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..1a3c43495b
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py b/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..f62eef9773
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py b/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..f8865a7c4d
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py b/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..ffc99f0109
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py b/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..6a9efc55ad
--- /dev/null
+++ b/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py b/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..6671fcb4bf
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py b/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..a441013a4c
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py b/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..d177d17e17
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,8 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(mask_size=(66, 66), num_classes=150),
+ auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py b/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..af06cb66cc
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py b/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..803c42da35
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py b/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..58a18a043a
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,8 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(mask_size=(66, 66), num_classes=150),
+ auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py b/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..2068667b0f
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py b/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..8745f5dbad
--- /dev/null
+++ b/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/psanet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/pspnet/README.md b/configs/pspnet/README.md
new file mode 100644
index 0000000000..ec31feeb8a
--- /dev/null
+++ b/configs/pspnet/README.md
@@ -0,0 +1,41 @@
+# Pyramid Scene Parsing Network
+
+## Introduction
+```
+@inproceedings{zhao2017pspnet,
+ title={Pyramid Scene Parsing Network},
+ author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya},
+ booktitle={CVPR},
+ year={2017}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-50-D8 | 512x1024 | 40000 | 6.1 | 4.07 | 77.85 | 79.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) |
+| PSPNet | R-101-D8 | 512x1024 | 40000 | 9.6 | 2.68 | 78.34 | 79.74 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) |
+| PSPNet | R-50-D8 | 769x769 | 40000 | 6.9 | 1.76 | 78.26 | 79.88 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json) |
+| PSPNet | R-101-D8 | 769x769 | 40000 | 10.9 | 1.15 | 79.08 | 80.28 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json) |
+| PSPNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.79 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json) |
+| PSPNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.76 | 81.01 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) |
+| PSPNet | R-50-D8 | 769x769 | 80000 | - | - | 79.59 | 80.69 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json) |
+| PSPNet | R-101-D8 | 769x769 | 80000 | - | - | 79.77 | 81.06 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.53 | 41.13 | 41.94 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json) |
+| PSPNet | R-101-D8 | 512x512 | 80000 | 12 | 15.30 | 43.57 | 44.35 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json) |
+| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | 42.48 | 43.44 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json) |
+| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | 44.39 | 45.35 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-50-D8 | 512x512 | 20000 | 6.1 | 23.59 | 76.78 | 77.61 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) |
+| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) |
+| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
+| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
diff --git a/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..38fee11bc2
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..9931a07bc2
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py b/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..6107b41544
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py b/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..2221b202d6
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py b/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..15f578b600
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py b/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..fb7c3d55d5
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..c6e7e58508
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..59b8c6dd5e
--- /dev/null
+++ b/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..5deb5872b0
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..4e9972849d
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py b/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..c34b66aaf8
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py b/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..cd88154d5e
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py b/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..f0c20c12f6
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py b/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..6922cc6d1f
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..e1026e0065
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..c1215c5c4a
--- /dev/null
+++ b/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/pspnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/upernet/README.md b/configs/upernet/README.md
new file mode 100644
index 0000000000..88a64d848d
--- /dev/null
+++ b/configs/upernet/README.md
@@ -0,0 +1,42 @@
+# Unified Perceptual Parsing for Scene Understanding
+
+## Introduction
+```
+@inproceedings{xiao2018unified,
+ title={Unified perceptual parsing for scene understanding},
+ author={Xiao, Tete and Liu, Yingcheng and Zhou, Bolei and Jiang, Yuning and Sun, Jian},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ pages={418--434},
+ year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|---------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UPerNet | R-50 | 512x1024 | 40000 | 6.4 | 4.25 | 77.10 | 78.37 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json) |
+| UPerNet | R-101 | 512x1024 | 40000 | 7.4 | 3.79 | 78.69 | 80.11 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) |
+| UPerNet | R-50 | 769x769 | 40000 | 7.2 | 1.76 | 77.98 | 79.70 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json) |
+| UPerNet | R-101 | 769x769 | 40000 | 8.4 | 1.56 | 79.03 | 80.77 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json) |
+| UPerNet | R-50 | 512x1024 | 80000 | - | - | 78.19 | 79.19 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json) |
+| UPerNet | R-101 | 512x1024 | 80000 | - | - | 79.40 | 80.46 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) |
+| UPerNet | R-50 | 769x769 | 80000 | - | - | 79.39 | 80.92 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json) |
+| UPerNet | R-101 | 769x769 | 80000 | - | - | 80.10 | 81.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json) |
+
+### ADE20K
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|---------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UPerNet | R-50 | 512x512 | 80000 | 8.1 | 23.40 | 40.70 | 41.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json) |
+| UPerNet | R-101 | 512x512 | 80000 | 9.1 | 20.34 | 42.91 | 43.96 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json) |
+| UPerNet | R-50 | 512x512 | 160000 | - | - | 42.05 | 42.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json) |
+| UPerNet | R-101 | 512x512 | 160000 | - | - | 43.82 | 44.85 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) |
+
+### Pascal VOC 2012 + Aug
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
+|---------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UPerNet | R-50 | 512x512 | 20000 | 6.4 | 23.17 | 74.82 | 76.35 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json) |
+| UPerNet | R-101 | 512x512 | 20000 | 7.5 | 19.98 | 77.10 | 78.29 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) |
+| UPerNet | R-50 | 512x512 | 40000 | - | - | 75.92 | 77.44 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json) |
+| UPerNet | R-101 | 512x512 | 40000 | - | - | 77.43 | 78.56 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) |
diff --git a/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py b/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..b90b597d83
--- /dev/null
+++ b/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py b/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..420ca2e428
--- /dev/null
+++ b/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_512x512_160k_ade20k.py b/configs/upernet/upernet_r101_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..146f13eb79
--- /dev/null
+++ b/configs/upernet/upernet_r101_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_512x512_20k_voc12aug.py b/configs/upernet/upernet_r101_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..56345d1806
--- /dev/null
+++ b/configs/upernet/upernet_r101_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_512x512_40k_voc12aug.py b/configs/upernet/upernet_r101_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..0669b741b9
--- /dev/null
+++ b/configs/upernet/upernet_r101_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_512x512_80k_ade20k.py b/configs/upernet/upernet_r101_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..abfb9c5d9f
--- /dev/null
+++ b/configs/upernet/upernet_r101_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_769x769_40k_cityscapes.py b/configs/upernet/upernet_r101_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..e5f3a3fae1
--- /dev/null
+++ b/configs/upernet/upernet_r101_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r101_769x769_80k_cityscapes.py b/configs/upernet/upernet_r101_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..a709165657
--- /dev/null
+++ b/configs/upernet/upernet_r101_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py b/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000..d621e89ce6
--- /dev/null
+++ b/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py b/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000..95fffcc76c
--- /dev/null
+++ b/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/upernet/upernet_r50_512x512_160k_ade20k.py b/configs/upernet/upernet_r50_512x512_160k_ade20k.py
new file mode 100644
index 0000000000..f259165fca
--- /dev/null
+++ b/configs/upernet/upernet_r50_512x512_160k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/upernet/upernet_r50_512x512_20k_voc12aug.py b/configs/upernet/upernet_r50_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000..95f5c09567
--- /dev/null
+++ b/configs/upernet/upernet_r50_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/upernet/upernet_r50_512x512_40k_voc12aug.py b/configs/upernet/upernet_r50_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000..9621fd1f5c
--- /dev/null
+++ b/configs/upernet/upernet_r50_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py',
+ '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/configs/upernet/upernet_r50_512x512_80k_ade20k.py b/configs/upernet/upernet_r50_512x512_80k_ade20k.py
new file mode 100644
index 0000000000..ce5d71f56d
--- /dev/null
+++ b/configs/upernet/upernet_r50_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
+test_cfg = dict(mode='whole')
diff --git a/configs/upernet/upernet_r50_769x769_40k_cityscapes.py b/configs/upernet/upernet_r50_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000..590ab61b76
--- /dev/null
+++ b/configs/upernet/upernet_r50_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/configs/upernet/upernet_r50_769x769_80k_cityscapes.py b/configs/upernet/upernet_r50_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000..b3a6107581
--- /dev/null
+++ b/configs/upernet/upernet_r50_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+ '../_base_/models/upernet_r50.py',
+ '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+ decode_head=dict(align_corners=True),
+ auxiliary_head=dict(align_corners=True))
+test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
diff --git a/demo/demo.png b/demo/demo.png
new file mode 100644
index 0000000000..1e82d7a077
Binary files /dev/null and b/demo/demo.png differ
diff --git a/demo/image_demo.py b/demo/image_demo.py
new file mode 100644
index 0000000000..183f23871b
--- /dev/null
+++ b/demo/image_demo.py
@@ -0,0 +1,29 @@
+from argparse import ArgumentParser
+
+from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
+from mmseg.core.evaluation import get_palette
+
+
+def main():
+ parser = ArgumentParser()
+ parser.add_argument('img', help='Image file')
+ parser.add_argument('config', help='Config file')
+ parser.add_argument('checkpoint', help='Checkpoint file')
+ parser.add_argument(
+ '--device', default='cuda:0', help='Device used for inference')
+ parser.add_argument(
+ '--palette',
+ default='cityscapes',
+ help='Color palette used for segmentation map')
+ args = parser.parse_args()
+
+ # build the model from a config file and a checkpoint file
+ model = init_segmentor(args.config, args.checkpoint, device=args.device)
+ # test a single image
+ result = inference_segmentor(model, args.img)
+ # show the results
+ show_result_pyplot(model, args.img, result, get_palette(args.palette))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb
new file mode 100644
index 0000000000..e47d964e3c
--- /dev/null
+++ b/demo/inference_demo.ipynb
@@ -0,0 +1,150 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "mkdir: cannot create directory ‘../checkpoints’: File exists\n",
+ "--2020-07-07 08:54:25-- https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth\n",
+ "Resolving open-mmlab.s3.ap-northeast-2.amazonaws.com (open-mmlab.s3.ap-northeast-2.amazonaws.com)... 52.219.58.55\n",
+ "Connecting to open-mmlab.s3.ap-northeast-2.amazonaws.com (open-mmlab.s3.ap-northeast-2.amazonaws.com)|52.219.58.55|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 196205945 (187M) [application/x-www-form-urlencoded]\n",
+ "Saving to: ‘../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth.1’\n",
+ "\n",
+ "pspnet_r50-d8_512x1 100%[===================>] 187.12M 16.5MB/s in 13s \n",
+ "\n",
+ "2020-07-07 08:54:38 (14.8 MB/s) - ‘../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth.1’ saved [196205945/196205945]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!mkdir ../checkpoints\n",
+ "!wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P ../checkpoints"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "pycharm": {
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot\n",
+ "from mmseg.core.evaluation import get_palette"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "pycharm": {
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "config_file = '../configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n",
+ "checkpoint_file = '../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# build the model from a config file and a checkpoint file\n",
+ "model = init_segmentor(config_file, checkpoint_file, device='cuda:0')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test a single image\n",
+ "img = 'demo.png'\n",
+ "result = inference_segmentor(model, img)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/mnt/v-liubin/code/mmsegmentation/mmseg/models/segmentors/base.py:265: UserWarning: show==False and out_file is not specified, only result image will be returned\n",
+ " warnings.warn('show==False and out_file is not specified, only '\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# show the results\n",
+ "show_result_pyplot(model, img, result, get_palette('cityscapes'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "open-mmlab",
+ "language": "python",
+ "name": "open-mmlab"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ },
+ "pycharm": {
+ "stem_cell": {
+ "cell_type": "raw",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": []
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000000..700ac15dee
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,20 @@
+ARG PYTORCH="1.3"
+ARG CUDA="10.1"
+ARG CUDNN="7"
+
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+
+RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install mmsegmentation
+RUN conda clean --all
+RUN git clone https://github.com/open-mmlab/mmsegmenation.git /mmsegmentation
+WORKDIR /mmsegmentation
+ENV FORCE_CUDA="1"
+RUN pip install --no-cache-dir -e .
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000..d4bb2cbb9e
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 0000000000..9c14a67564
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,61 @@
+API Reference
+==============
+
+mmseg.apis
+--------------
+.. automodule:: mmseg.apis
+ :members:
+
+mmseg.core
+--------------
+
+seg
+^^^^^^^^^^
+.. automodule:: mmseg.core.seg
+ :members:
+
+evaluation
+^^^^^^^^^^
+.. automodule:: mmseg.core.evaluation
+ :members:
+
+utils
+^^^^^^^^^^
+.. automodule:: mmseg.core.utils
+ :members:
+
+mmseg.datasets
+--------------
+
+datasets
+^^^^^^^^^^
+.. automodule:: mmseg.datasets
+ :members:
+
+pipelines
+^^^^^^^^^^
+.. automodule:: mmseg.datasets.pipelines
+ :members:
+
+mmseg.models
+--------------
+
+segmentors
+^^^^^^^^^^
+.. automodule:: mmseg.models.segmentors
+ :members:
+
+backbones
+^^^^^^^^^^
+.. automodule:: mmseg.models.backbones
+ :members:
+
+decode_heads
+^^^^^^^^^^^^
+.. automodule:: mmseg.models.decode_heads
+ :members:
+
+losses
+^^^^^^^^^^
+.. automodule:: mmseg.models.losses
+ :members:
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000000..20f2534dec
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,72 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath('..'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'MMSegmentation'
+copyright = '2020-2020, OpenMMLab'
+author = 'MMSegmentation Authors'
+
+# The full version, including alpha/beta/rc tags
+with open('../mmseg/VERSION', 'r') as f:
+ release = f.read().strip()
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.napoleon',
+ 'sphinx.ext.viewcode',
+ 'recommonmark',
+ 'sphinx_markdown_tables',
+]
+
+autodoc_mock_imports = ['matplotlib', 'pycocotools', 'mmseg.version']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = {
+ '.rst': 'restructuredtext',
+ '.md': 'markdown',
+}
+
+# The master toctree document.
+master_doc = 'index'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
diff --git a/docs/config.md b/docs/config.md
new file mode 100644
index 0000000000..e07fdfee84
--- /dev/null
+++ b/docs/config.md
@@ -0,0 +1,365 @@
+# Config System
+We incorporate modular and inheritance design into our config system, which is convenient to conduct various experiments.
+If you wish to inspect the config file, you may run `python tools/print_config.py /PATH/TO/CONFIG` to see the complete config.
+You may also pass `--options xxx.yyy=zzz` to see updated config.
+
+## Config File Structure
+
+There are 4 basic component types under `config/_base_`, dataset, model, schedule, default_runtime.
+Many methods could be easily constructed with one of each like DeepLabV3, PSPNet.
+The configs that are composed by components from `_base_` are called _primitive_.
+
+For all configs under the same folder, it is recommended to have only **one** _primitive_ config. All other configs should inherit from the _primitive_ config. In this way, the maximum of inheritance level is 3.
+
+For easy understanding, we recommend contributors to inherit from exiting methods.
+For example, if some modification is made base on DeepLabV3, user may first inherit the basic DeepLabV3 structure by specifying `_base_ = ../deeplabv3/deeplabv3_r50_512x1024_40ki_cityscapes.py`, then modify the necessary fields in the config files.
+
+If you are building an entirely new method that does not share the structure with any of the existing methods, you may create a folder `xxxnet` under `configs`,
+
+Please refer to [mmcv](https://mmcv.readthedocs.io/en/latest/utils.html#config) for detailed documentation.
+
+## Config Name Style
+
+We follow the below style to name config files. Contributors are advised to follow the same style.
+
+```
+{model}_{backbone}_[misc]_[gpu x batch_per_gpu]_{resolution}_{schedule}_{dataset}
+```
+
+`{xxx}` is required field and `[yyy]` is optional.
+
+- `{model}`: model type like `psp`, `deeplabv3`, etc.
+- `{backbone}`: backbone type like `r50` (ResNet-50), `x101` (ResNeXt-101).
+- `[misc]`: miscellaneous setting/plugins of model, e.g. `dconv`, `gcb`, `attention`, `mstrain`.
+- `[gpu x batch_per_gpu]`: GPUs and samples per GPU, `8x2` is used by default.
+- `{schedule}`: training schedule, `20ki` means 20k iterations.
+- `{dataset}`: dataset like `cityscapes`, `voc12aug`, `ade`.
+
+## An Example of PSPNet
+
+To help the users have a basic idea of a complete config and the modules in a modern semantic segmentation system,
+we make brief comments on the config of PSPNet using ResNet50V1c as the following.
+For more detailed usage and the corresponding alternative for each modules, please refer to the API documentation.
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True) # Segmentation usually uses SyncBN
+model = dict(
+ type='EncoderDecoder', # Name of segmentor
+ pretrained='open-mmlab://resnet50_v1c', # The ImageNet pretrained backbone to be loaded
+ backbone=dict(
+ type='ResNetV1c', # The type of backbone. Please refer to mmseg/backbone/resnet.py for details.
+ depth=50, # Depth of backbone. Normally 50, 101 are used.
+ num_stages=4, # Number of stages of backbone.
+ out_indices=(0, 1, 2, 3), # The index of output feature maps produced in each stages.
+ dilations=(1, 1, 2, 4), # The dilation rate of each layer.
+ strides=(1, 2, 1, 1), # The stride of each layer.
+ norm_cfg=dict( # The configuration of norm layer.
+ type='SyncBN', # Type of norm layer. Usually it is SyncBN.
+ requires_grad=True), # Whether to train the gamma and beta in norm
+ norm_eval=False, # Whether to freeze the statistics in BN
+ style='pytorch', # The style of backbone, 'pytorch' means that stride 2 layers are in 3x3 conv, 'caffe' means stride 2 layers are in 1x1 convs.
+ contract_dilation=True), # When dilation > 1, whether contract first layer of dilation.
+ decode_head=dict(
+ type='PSPHead', # Type of decode head. Please refer to mmseg/models/decode_heads for available options.
+ in_channels=2048, # Input channel of decode head.
+ in_index=3, # The index of feature map to select.
+ channels=512, # The intermediate channels of decode head.
+ pool_scales=(1, 2, 3, 6), # The avg pooling scales of PSPHead. Please refer to paper for details.
+ drop_out_ratio=0.1, # The dropout ratio before final classification layer.
+ num_classes=19, # Number of segmentation classs. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k.
+ norm_cfg=dict(type='SyncBN', requires_grad=True), # The configuration of norm layer.
+ align_corners=False, # The align_corners argument for resize in decoding.
+ loss_decode=dict( # Config of loss function for the decode_head.
+ type='CrossEntropyLoss', # Type of loss used for segmentation.
+ use_sigmoid=False, # Whether use sigmoid activation for segmentation.
+ loss_weight=1.0)), # Loss weight of decode head.
+ auxiliary_head=dict(
+ type='FCNHead', # Type of auxiliary head. Please refer to mmseg/models/decode_heads for available options.
+ in_channels=1024, # Input channel of auxiliary head.
+ in_index=2, # The index of feature map to select.
+ channels=256, # The intermediate channels of decode head.
+ num_convs=1, # Number of convs in FCNHead. It is usually 1 in auxiliary head.
+ concat_input=False, # Whether concat output of convs with input before classification layer.
+ drop_out_ratio=0.1, # The dropout ratio before final classification layer.
+ num_classes=19, # Number of segmentation classs. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k.
+ norm_cfg=dict(type='SyncBN', requires_grad=True), # The configuration of norm layer.
+ align_corners=False, # The align_corners argument for resize in decoding.
+ loss_decode=dict( # Config of loss function for the decode_head.
+ type='CrossEntropyLoss', # Type of loss used for segmentation.
+ use_sigmoid=False, # Whether use sigmoid activation for segmentation.
+ loss_weight=0.4))) # Loss weight of auxiliary head, which is usually 0.4 of decode head.
+train_cfg = dict() # train_cfg is just a place holder for now.
+test_cfg = dict(mode='whole') # The test mode, options are 'whole' and 'sliding'. 'whole': whole image fully-convolutional test. 'sliding': sliding crop window on the image.
+dataset_type = 'CityscapesDataset' # Dataset type, this will be used to define the dataset.
+data_root = 'data/cityscapes/' # Root path of data.
+img_norm_cfg = dict( # Image normalization config to normalize the input images.
+ mean=[123.675, 116.28, 103.53], # Mean values used to pre-training the pre-trained backbone models.
+ std=[58.395, 57.12, 57.375], # Standard variance used to pre-training the pre-trained backbone models.
+ to_rgb=True) # The channel orders of image used to pre-training the pre-trained backbone models.
+crop_size = (512, 1024) # The crop size during training.
+train_pipeline = [ # Training pipeline.
+ dict(type='LoadImageFromFile'), # First pipeline to load images from file path.
+ dict(type='LoadAnnotations'), # Second pipeline to load annotations for current image.
+ dict(type='Resize', # Augmentation pipeline that resize the images and their annotations.
+ img_scale=(2048, 1024), # The largest scale of image.
+ ratio_range=(0.5, 2.0)), # The augmented scale range as ratio.
+ dict(type='RandomCrop', # Augmentation pipeline that randomly crop a patch from current image.
+ crop_size=(512, 1024), # The crop size of patch.
+ cat_max_ratio=0.75), # The max area ratio that could be occupied by single category.
+ dict(
+ type='RandomFlip', # Augmentation pipeline that flip the images and their annotations
+ flip_ratio=0.5), # The ratio or probability to flip
+ dict(type='PhotoMetricDistortion'), # Augmentation pipeline that distort current image with several photo metric methods.
+ dict(
+ type='Normalize', # Augmentation pipeline that normalize the input images
+ mean=[123.675, 116.28, 103.53], # These keys are the same of img_norm_cfg since the
+ std=[58.395, 57.12, 57.375], # keys of img_norm_cfg are used here as arguments
+ to_rgb=True),
+ dict(type='Pad', # Augmentation pipeline that pad the image to specified size.
+ size=(512, 1024), # The output size of padding.
+ pad_val=0, # The padding value for image.
+ seg_pad_val=255), # The padding value of 'gt_semantic_seg'.
+ dict(type='DefaultFormatBundle'), # Default format bundle to gather data in the pipeline
+ dict(type='Collect', # Pipeline that decides which keys in the data should be passed to the segmentor
+ keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'), # First pipeline to load images from file path
+ dict(
+ type='MultiScaleFlipAug', # An encapsulation that encapsulates the test time augmentations
+ img_scale=(2048, 1024), # Decides the largest scale for testing, used for the Resize pipeline
+ flip=False, # Whether to flip images during testing
+ transforms=[
+ dict(type='Resize', # Use resize augmentation
+ keep_ratio=True), # Whether to keep the ratio between height and width, the img_scale set here will be supressed by the img_scale set above.
+ dict(type='RandomFlip'), # Thought RandomFlip is added in pipeline, it is not used when flip=False
+ dict(
+ type='Normalize', # Normalization config, the values are from img_norm_cfg
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='ImageToTensor', # Convert image to tensor
+ keys=['img']),
+ dict(type='Collect', # Collect pipeline that collect necessary keys for testing.
+ keys=['img'])
+ ])
+]
+data = dict(
+ samples_per_gpu=2, # Batch size of a single GPU
+ workers_per_gpu=2, # Worker to pre-fetch data for each single GPU
+ train=dict( # Train dataset config
+ type='CityscapesDataset', # Type of dataset, refer to mmseg/datasets/ for details.
+ data_root='data/cityscapes/', # The root of dataset.
+ img_dir='leftImg8bit/train', # The image directory of dataset.
+ ann_dir='gtFine/train', # The annotation directory of dataset.
+ pipeline=[ # pipeline, this is passed by the train_pipeline created before.
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(
+ type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+ ]),
+ val=dict( # Validation dataset config
+ type='CityscapesDataset',
+ data_root='data/cityscapes/',
+ img_dir='leftImg8bit/val',
+ ann_dir='gtFine/val',
+ pipeline=[ # Pipeline is passed by test_pipeline created before
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 1024),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img'])
+ ])
+ ]),
+ test=dict(
+ type='CityscapesDataset',
+ data_root='data/cityscapes/',
+ img_dir='leftImg8bit/val',
+ ann_dir='gtFine/val',
+ pipeline=[
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 1024),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(
+ type='Normalize',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img'])
+ ])
+ ]))
+log_config = dict( # config to register logger hook
+ interval=50, # Interval to print the log
+ hooks=[
+ # dict(type='TensorboardLoggerHook') # The Tensorboard logger is also supported
+ dict(type='TextLoggerHook', by_epoch=False)
+ ])
+dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set.
+log_level = 'INFO' # The level of logging.
+load_from = None # load models as a pre-trained model from a given path. This will not resume training.
+resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved.
+workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once. The workflow trains the model by 12 epochs according to the total_epochs.
+cudnn_benchmark = True # Whether use cudnn_benchmark to speed up, which is fast for fixed input size.
+optimizer = dict( # Config used to build optimizer, support all the optimizers in PyTorch whose arguments are also the same as those in PyTorch
+ type='SGD', # Type of optimizers, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details
+ lr=0.01, # Learning rate of optimizers, see detail usages of the parameters in the documentation of PyTorch
+ momentum=0.9, # Momentum
+ weight_decay=0.0005) # Weight decay of SGD
+optimizer_config = dict() # Config used to build the optimizer hook, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/optimizer.py#L8 for implementation details.
+lr_config = dict(
+ policy='poly', # The policy of scheduler, also support Step, CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9.
+ power=0.9, # The power of polynomial decay.
+ min_lr=0.0001, # The minimum learning rate to stable the training.
+ by_epoch=False) # Whethe count by epoch or not.
+total_iters = 40000 # Total number of iterations.
+checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation.
+ by_epoch=False, # Whethe count by epoch or not.
+ interval=4000) # The save interval.
+evaluation = dict( # The config to build the evaluation hook. Please refer to mmseg/core/evaulation/eval_hook.py for details.
+ interval=4000, # The interval of evaluation.
+ metric='mIoU') # The evaluation metric.
+
+
+```
+
+## FAQ
+
+### Ignore some fields in the base configs
+
+Sometimes, you may set `_delete_=True` to ignore some of fields in base configs.
+You may refer to [mmcv](https://mmcv.readthedocs.io/en/latest/utils.html#inherit-from-base-config-with-ignored-fields) for simple inllustration.
+
+In MMSegmentation, for example, to change the backbone of PSPNet with the following config.
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='MaskRCNN',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(...),
+ auxiliary_head=dict(...))
+```
+
+`ResNet` and `HRNet` use different keywords to construct.
+
+```python
+_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ pretrained='open-mmlab://msra/hrnetv2_w32',
+ backbone=dict(
+ _delete_=True,
+ type='HRNet',
+ norm_cfg=norm_cfg,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256)))),
+ decode_head=dict(...),
+ auxiliary_head=dict(...))
+```
+
+The `_delete_=True` would replace all old keys in `backbone` field with new keys new keys.
+
+### Use intermediate variables in configs
+
+Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets.
+It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again.
+For example, we would like to change multi scale strategy to train/test a PSPNet. `train_pipeline`/`test_pipeline` are intermediate variable we would like modify.
+```python
+_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscapes.py'
+crop_size = (512, 1024)
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(1.0, 2.0)), # change to [1., 2.]
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 1024),
+ img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], # change to multi scale testing
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ train=dict(pipeline=train_pipeline),
+ val=dict(pipeline=test_pipeline),
+ test=dict(pipeline=test_pipeline))
+```
+We first define the new `train_pipeline`/`test_pipeline` and pass them into `data`.
diff --git a/docs/getting_started.md b/docs/getting_started.md
new file mode 100644
index 0000000000..a5ad9b888d
--- /dev/null
+++ b/docs/getting_started.md
@@ -0,0 +1,332 @@
+# Getting Started
+
+This page provides basic tutorials about the usage of MMSegmentation.
+For installation instructions, please see [install.md](install.md).
+
+## Prepare datasets
+
+It is recommended to symlink the dataset root to `$MMSEGMENTATION/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+```
+mmsegmentation
+├── mmseg
+├── tools
+├── configs
+├── data
+│ ├── cityscapes
+│ │ ├── leftImg8bit
+│ │ │ ├── train
+│ │ │ ├── val
+│ │ ├── gtFine
+│ │ │ ├── train
+│ │ │ ├── val
+│ ├── VOCdevkit
+│ │ ├── VOC2012
+│ │ │ ├── JPEGImages
+│ │ │ ├── SegmentationClass
+│ │ │ ├── ImageSets
+│ │ │ │ ├── Segmentation
+│ │ ├── VOCaug
+│ │ │ ├── dataset
+│ │ │ │ ├── cls
+│ ├── ade
+│ │ ├── ADEChallengeData2016
+│ │ │ ├── annotations
+│ │ │ │ ├── training
+│ │ │ │ ├── validation
+│ │ │ ├── images
+│ │ │ │ ├── training
+│ │ │ │ ├── validation
+
+```
+
+### Cityscapes
+The data could be found [here](https://www.cityscapes-dataset.com/downloads/) after registration.
+
+By convention, `**labelTrainIds.png` are used for cityscapes training.
+We provided a [scripts](../tools/convert_datasets/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)
+to generate `**labelTrainIds.png`.
+```shell
+# --nproc means 8 process for conversion, which could be omitted as well.
+python tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8
+```
+
+### Pascal VOC
+Pascal VOC 2012 could be downloaded from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar).
+Beside, most recent works on Pascal VOC dataset usually exploit extra augmentation data, which could be found [here](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz).
+
+If you would like to use augmented VOC dataset, please run following command to convert augmentation annotations into proper format.
+```shell
+# --nproc means 8 process for conversion, which could be omitted as well.
+python tools/convert_datasets/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8
+```
+
+Please refer to [concat dataset](tutorials/new_dataset.md#concatenate-dataset) for details about how to concatenate them and train them together.
+
+
+### ADE20K
+The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
+We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
+
+## Inference with pretrained models
+
+We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),
+and also some high-level apis for easier integration to other projects.
+
+### Test a dataset
+
+- single GPU
+- single node multiple GPU
+- multiple node
+
+You can use the following commands to test a dataset.
+
+```shell
+# single-gpu testing
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show]
+
+# multi-gpu testing
+./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]
+```
+
+Optional arguments:
+- `RESULT_FILE`: Filename of the output results in pickle format. If not specified, the results will not be saved to a file.
+- `EVAL_METRICS`: Items to be evaluated on the results. Allowed values depend on the dataset, e.g., `mIoU` is available for all dataset. Cityscapes could be evaluated by `cityscapes` as well as standard `mIoU` metrics.
+- `--show`: If specified, segmentation results will be plotted on the images and shown in a new window. It is only applicable to single GPU testing and used for debugging and visualization. Please make sure that GUI is available in your environment, otherwise you may encounter the error like `cannot connect to X server`.
+- `--show-dir`: If specified, segmentation results will be plotted on the images and saved to the specified directory. It is only applicable to single GPU testing and used for debugging and visualization. You do NOT need a GUI available in your environment for using this option.
+
+
+Examples:
+
+Assume that you have already downloaded the checkpoints to the directory `checkpoints/`.
+
+1. Test PSPNet and visualize the results. Press any key for the next image.
+
+ ```shell
+ python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+ checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+ --show
+ ```
+
+2. Test PSPNet and save the painted images for latter visualization.
+
+ ```shell
+ python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+ checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+ --show-dir psp_r50_512x1024_40ki_cityscapes_results
+ ```
+
+3. Test PSPNet on PASCAL VOC (without saving the test results) and evaluate the mIoU.
+
+ ```shell
+ python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_20k_voc12aug.py \
+ checkpoints/pspnet_r50-d8_512x1024_20k_voc12aug_20200605_003338-c57ef100.pth \
+ --eval mAP
+ ```
+
+4. Test PSPNet with 8 GPUs, and evaluate the standard mIoU and cityscapes metric.
+
+ ```shell
+ ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+ checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+ 8 --out results.pkl --eval mIoU cityscapes
+ ```
+
+5. Test PSPNet on cityscapes test split with 8 GPUs, and generate the png files to be submit to the official evaluation server.
+
+ ```shell
+ ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+ checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+ 8 --format-only --options "imgfile_prefix=./pspnet_test_results"
+ ```
+
+You will get png files under `./pspnet_test_results` directory.
+
+
+### Image demo
+
+We provide a demo script to test a single image.
+
+```shell
+python demo/image_demo.py ${IMAGE_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${DEVICE_NAME}] [--palette-thr ${PALETTE}]
+```
+
+Examples:
+
+```shell
+python demo/image_demo.py demo/demo.jpg configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+ checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth --device cuda:0 --palette cityscapes
+```
+
+
+### High-level APIs for testing images
+
+Here is an example of building the model and test given images.
+
+```python
+from mmseg.apis import inference_segmentor, init_segmentor
+import mmcv
+
+config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'
+checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
+
+# build the model from a config file and a checkpoint file
+model = init_segmentor(config_file, checkpoint_file, device='cuda:0')
+
+# test a single image and show the results
+img = 'test.jpg' # or img = mmcv.imread(img), which will only load it once
+result = inference_segmentor(model, img)
+# visualize the results in a new window
+model.show_result(img, result, show=True)
+# or save the visualization results to image files
+model.show_result(img, result, out_file='result.jpg')
+
+# test a video and show the results
+video = mmcv.VideoReader('video.mp4')
+for frame in video:
+ result = inference_segmentor(model, frame)
+ model.show_result(frame, result, wait_time=1)
+```
+
+A notebook demo can be found in [demo/inference_demo.ipynb](../demo/inference_demo.ipynb).
+
+
+## Train a model
+
+MMSegmentation implements distributed training and non-distributed training,
+which uses `MMDistributedDataParallel` and `MMDataParallel` respectively.
+
+All outputs (log files and checkpoints) will be saved to the working directory,
+which is specified by `work_dir` in the config file.
+
+By default we evaluate the model on the validation set after some iterations, you can change the evaluation interval by adding the interval argument in the training config.
+```python
+evaluation = dict(interval=4000) # This evaluate the model per 4000 iterations.
+```
+
+**\*Important\***: The default learning rate in config files is for 8 GPUs and 1 img/gpu (batch size = 8x1 = 8).
+Equivalently, you may also use 4 GPUs and 2 imgs/gpu since all models using cross-GPU SyncBN.
+
+### Train with a single GPU
+
+```shell
+python tools/train.py ${CONFIG_FILE} [optional arguments]
+```
+
+If you want to specify the working directory in the command, you can add an argument `--work_dir ${YOUR_WORK_DIR}`.
+
+### Train with multiple GPUs
+
+```shell
+./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
+```
+
+Optional arguments are:
+
+- `--no-validate` (**not suggested**): By default, the codebase will perform evaluation at every k iterations during the training. To disable this behavior, use `--no-validate`.
+- `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file.
+- `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file.
+
+Difference between `resume-from` and `load-from`:
+`resume-from` loads both the model weights and optimizer status, and the iteration number is also inherited from the specified checkpoint. It is usually used for resuming the training process that is interrupted accidentally.
+`load-from` only loads the model weights and the training iteration starts from 0. It is usually used for finetuning.
+
+### Train with multiple machines
+
+If you run MMSegmentation on a cluster managed with [slurm](https://slurm.schedmd.com/), you can use the script `slurm_train.sh`. (This script also supports single machine training.)
+
+```shell
+[GPUS=${GPUS}] ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} ${WORK_DIR}
+```
+
+Here is an example of using 16 GPUs to train PSPNet on the dev partition.
+
+```shell
+GPUS=16 ./tools/slurm_train.sh dev pspr50 configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py /nfs/xxxx/psp_r50_512x1024_40ki_cityscapes
+```
+
+You can check [slurm_train.sh](../tools/slurm_train.sh) for full arguments and environment variables.
+
+If you have just multiple machines connected with ethernet, you can refer to
+PyTorch [launch utility](https://pytorch.org/docs/stable/distributed_deprecated.html#launch-utility).
+Usually it is slow if you do not have high speed networking like InfiniBand.
+
+### Launch multiple jobs on a single machine
+
+If you launch multiple jobs on a single machine, e.g., 2 jobs of 4-GPU training on a machine with 8 GPUs,
+you need to specify different ports (29500 by default) for each job to avoid communication conflict.
+
+If you use `dist_train.sh` to launch training jobs, you can set the port in commands.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG_FILE} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG_FILE} 4
+```
+
+If you use launch training jobs with Slurm, you need to modify the config files (usually the 6th line from the bottom in config files) to set different communication ports.
+
+In `config1.py`,
+```python
+dist_params = dict(backend='nccl', port=29500)
+```
+
+In `config2.py`,
+```python
+dist_params = dict(backend='nccl', port=29501)
+```
+
+Then you can launch two jobs with `config1.py` ang `config2.py`.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py ${WORK_DIR}
+CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py ${WORK_DIR}
+```
+
+Or you could specify port by `---options dist_params.port=29501`
+
+## Useful tools
+
+We provide lots of useful tools under `tools/` directory.
+
+### Get the FLOPs and params (experimental)
+
+We provide a script adapted from [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) to compute the FLOPs and params of a given model.
+
+```shell
+python tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
+```
+
+You will get the result like this.
+
+```
+==============================
+Input shape: (3, 2048, 1024)
+Flops: 1429.68 GMac
+Params: 48.98 M
+==============================
+```
+
+**Note**: This tool is still experimental and we do not guarantee that the number is correct. You may well use the result for simple comparisons, but double check it before you adopt it in technical reports or papers.
+
+(1) FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 1280, 800).
+(2) Some operators are not counted into FLOPs like GN and custom operators.
+You can add support for new operators by modifying [`mmseg/utils/flops_counter.py`](../mmseg/utils/flops_counter.py).
+
+### Publish a model
+
+Before you upload a model to AWS, you may want to
+(1) convert model weights to CPU tensors, (2) delete the optimizer states and
+(3) compute the hash of the checkpoint file and append the hash id to the filename.
+
+```shell
+python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME}
+```
+
+E.g.,
+
+```shell
+python tools/publish_model.py work_dirs/pspnet/latest.pth psp_r50_hszhao_200ep.pth
+```
+
+The final output filename will be `psp_r50_512x1024_40ki_cityscapes-{hash id}.pth`.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000000..caa6677249
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,28 @@
+Welcome to MMSegmenation's documentation!
+=========================================
+
+.. toctree::
+ :maxdepth: 2
+
+ install.md
+ getting_started.md
+ config.md
+ model_zoo.md
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Tutorials
+
+ tutorials/index.rst
+
+.. toctree::
+ :caption: API Reference
+
+ api.rst
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 0000000000..5d6a2d9bc5
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,89 @@
+## Installation
+
+### Requirements
+
+- Linux (Windows is not officially supported)
+- Python 3.6+
+- PyTorch 1.3 or higher
+- [mmcv](https://github.com/open-mmlab/mmcv)
+
+### Install mmsegmentation
+
+a. Create a conda virtual environment and activate it.
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+```
+
+b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).
+Here we use PyTorch 1.5.0 and CUDA 10.1.
+You may also switch to other version by specifying version number.
+
+```shell
+conda install pytorch=1.5.0 torchvision cudatoolkit=10.1 -c pytorch
+```
+
+c. Clone the mmsegmentation repository.
+
+```shell
+git clone http://github.com/open-mmlab/mmsegmentation
+cd mmsegmentation
+```
+
+d. Install [MMCV](https://mmcv.readthedocs.io/en/latest/).
+Either *mmcv* or *mmcv-full* is compatible with MMSegmentation, but for methods like CCNet and PSANet, CUDA ops in *mmcv-full* is required
+
+The pre-build *mmcv-full* could be installed by running: (available versions could be found [here](https://mmcv.readthedocs.io/en/latest/#install-with-pip))
+```
+pip install mmcv-full==latest+torch1.5.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
+```
+
+Optionally, you could also install lite version by running:
+```
+pip install mmcv
+```
+or build full version from source:
+```
+pip install mmcv-full
+```
+
+e. Install build requirements and then install MMSegmentation.
+
+```shell
+pip install -r requirements/build.txt # or "pip install -r requirements.txt" for everything.
+pip install -e . # or "python setup.py develop"
+```
+
+Note:
+
+1. The git commit id will be written to the version number with step *e*, e.g. 0.5.0+c415a2e. The version will also be saved in trained models.
+It is recommended that you run step *e* each time you pull some updates from github. If C++/CUDA codes are modified, then this step is compulsory.
+
+2. Following the above instructions, mmsegmentation is installed on `dev` mode, any local modifications made to the code will take effect without the need to reinstall it (unless you submit some commits and want to update the version number).
+
+3. If you would like to use `opencv-python-headless` instead of `opencv-python`,
+you can install it before installing MMCV.
+
+4. Some dependencies are optional. Simply running `pip install -e .` will only install the minimum runtime requirements.
+To use optional dependencies like `cityscapessripts` either install them manually with `pip install -r requirements/optional.txt` or specify desired extras when calling `pip` (e.g. `pip install -e .[optional]`). Valid keys for the extras field are: `all`, `tests`, `build`, and `optional`.
+
+
+### A from-scratch setup script
+
+Here is a full script for setting up mmsegmentation with conda and link the dataset path (supposing that your dataset path is $DATA_ROOT).
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+
+conda install pytorch=1.5.0 torchvision cudatoolkit=10.1 -c pytorch
+git clone http://github.com/open-mmlab/mmsegmentation
+cd mmsegmentation
+pip install mmcv-full==latest+torch1.5.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
+pip install -r requirements/build.txt
+pip install -e .
+
+mkdir data
+ln -s $DATA_ROOT data
+```
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000000..922152e96a
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/model_zoo.json b/docs/model_zoo.json
new file mode 100644
index 0000000000..cc14cce043
--- /dev/null
+++ b/docs/model_zoo.json
@@ -0,0 +1,2724 @@
+{
+ "ccnet": {
+ "voc12aug": [
+ [
+ [
+ "CCNet",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.0",
+ 20.446969644812683,
+ 76.168,
+ 77.51245728562927,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.5",
+ 13.637111132708073,
+ 77.274,
+ 79.02193536016937,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 75.96300000000001,
+ 77.03666314173265,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.86800000000001,
+ 78.90226783309761,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "CCNet",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "6.0",
+ 3.321448861645321,
+ 77.757,
+ 78.87281569371032,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "9.5",
+ 2.3057084889880533,
+ 76.346,
+ 78.19477535704155,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "6.8",
+ 1.4297640908184566,
+ 78.461,
+ 79.9288478571096,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "10.7",
+ 1.0054480750692631,
+ 76.941,
+ 78.62346948358564,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.035,
+ 80.1605485551008,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.86800000000001,
+ 79.89770560760813,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.295,
+ 81.07581708289482,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.449,
+ 80.65765062513057,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "CCNet",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "8.8",
+ 20.889847025344185,
+ 41.776,
+ 42.980388602332184,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.2",
+ 14.108705519350595,
+ 43.972,
+ 45.13437368692854,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.079,
+ 43.131354987778764,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json)"
+ ],
+ [
+ "CCNet",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.706,
+ 45.043400185988624,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json)"
+ ]
+ ]
+ ]
+ },
+ "ocrnet": {
+ "cityscapes": [
+ [
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x1024",
+ 40000,
+ "3.5",
+ 10.452887853499684,
+ 74.30099999999999,
+ 75.94532264911325,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x1024",
+ 40000,
+ "4.7",
+ 7.504321415510909,
+ 77.71900000000001,
+ 79.49233034088692,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x1024",
+ 40000,
+ "8.0",
+ 4.215373853142414,
+ 80.58,
+ 81.79213277409706,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 77.157,
+ 78.66157171766707,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.568,
+ 80.45534029123633,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 80.704,
+ 81.87462053536443,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x1024",
+ 160000,
+ "-",
+ "-",
+ 78.448,
+ 79.9684406563932,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x1024",
+ 160000,
+ "-",
+ "-",
+ 79.473,
+ 80.91408916940453,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x1024",
+ 160000,
+ "-",
+ "-",
+ 81.34599999999999,
+ 82.69728960882979,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 20000,
+ "3.5",
+ 31.554844022107428,
+ 71.7,
+ 73.83921653423745,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x512",
+ 20000,
+ "4.7",
+ 19.90720967998522,
+ 74.749,
+ 77.1105042314631,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x512",
+ 20000,
+ "8.1",
+ 17.82942134961672,
+ 77.72,
+ 79.87183377075576,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 72.761,
+ 74.6014601681293,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 74.982,
+ 77.39817842813225,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.143,
+ 79.70754598517257,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 80000,
+ "6.7",
+ 28.980094398974657,
+ 35.056,
+ 35.797050387137105,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x512",
+ 80000,
+ "7.9",
+ 18.928971854245283,
+ 37.789,
+ 39.155377232744,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x512",
+ 80000,
+ "11.2",
+ 16.991178423144667,
+ 43.0,
+ 44.299600723103225,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 37.191,
+ 38.40331034259458,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W18",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 39.322,
+ 40.80220494656125,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json)"
+ ],
+ [
+ "OCRNet",
+ "HRNetV2p-W48",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.254,
+ 44.87655360616251,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json)"
+ ]
+ ]
+ ]
+ },
+ "fcn": {
+ "ade20k": [
+ [
+ [
+ "FCN",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "8.5",
+ 23.4864501408415,
+ 35.94,
+ 37.93716647334422,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.0",
+ 14.780950192500319,
+ 39.614,
+ 40.827367113582405,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json)"
+ ],
+ [
+ "FCN",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 36.105,
+ 38.078772436420934,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 39.914,
+ 41.39843118160508,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "FCN",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "5.7",
+ 4.169686275718568,
+ 72.246,
+ 73.35990418338677,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "9.2",
+ 2.6579467518998623,
+ 75.44999999999999,
+ 76.58052712300109,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json)"
+ ],
+ [
+ "FCN",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "6.5",
+ 1.796476680257555,
+ 71.47099999999999,
+ 72.53595679063739,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "10.4",
+ 1.1858280952855258,
+ 73.929,
+ 75.13723386002961,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json)"
+ ],
+ [
+ "FCN",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 73.61,
+ 74.23620409061135,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 75.13300000000001,
+ 75.93619310604196,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json)"
+ ],
+ [
+ "FCN",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 72.643,
+ 73.31626041581089,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 75.519,
+ 76.60857360886911,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "FCN",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "5.7",
+ 23.28063707693325,
+ 67.085,
+ 69.9383574378687,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.2",
+ 14.80917380811037,
+ 71.16,
+ 73.56633022724682,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json)"
+ ],
+ [
+ "FCN",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 66.971,
+ 69.03918575643368,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json)"
+ ],
+ [
+ "FCN",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 69.90899999999999,
+ 72.3822015171163,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json)"
+ ]
+ ]
+ ]
+ },
+ "upernet": {
+ "cityscapes": [
+ [
+ [
+ "UPerNet",
+ "R-50",
+ "512x1024",
+ 40000,
+ "6.4",
+ 4.250181636943019,
+ 77.096,
+ 78.3708722046974,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "512x1024",
+ 40000,
+ "7.4",
+ 3.7930291329191848,
+ 78.689,
+ 80.10934950511658,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-50",
+ "769x769",
+ 40000,
+ "7.2",
+ 1.7640659185483825,
+ 77.97699999999999,
+ 79.70262909350413,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "769x769",
+ 40000,
+ "8.4",
+ 1.5620856953198976,
+ 79.03099999999999,
+ 80.76684306267266,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-50",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.193,
+ 79.18874361591651,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.396,
+ 80.45737464738971,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-50",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.389,
+ 80.91628431360874,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 80.096,
+ 81.49282902181865,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "UPerNet",
+ "R-50",
+ "512x512",
+ 20000,
+ "6.4",
+ 23.173912855179744,
+ 74.823,
+ 76.3452508971145,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "512x512",
+ 20000,
+ "7.5",
+ 19.980025806149488,
+ 77.096,
+ 78.28805607216208,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-50",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 75.921,
+ 77.43852589851066,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.432,
+ 78.55592324577675,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "UPerNet",
+ "R-50",
+ "512x512",
+ 80000,
+ "8.1",
+ 23.404934213597443,
+ 40.704,
+ 41.80915610272295,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "512x512",
+ 80000,
+ "9.1",
+ 20.336544682582634,
+ 42.91,
+ 43.95794963214672,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-50",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.05,
+ 42.784926632807014,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json)"
+ ],
+ [
+ "UPerNet",
+ "R-101",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.824999999999996,
+ 44.84822175137515,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json)"
+ ]
+ ]
+ ]
+ },
+ "encnet": {
+ "ade20k": [
+ [
+ [
+ "encnet",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "10.1",
+ 22.81234693333879,
+ 39.53,
+ 41.174465044693534,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json)"
+ ],
+ [
+ "encnet",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "13.6",
+ 14.8713593833497,
+ 42.108000000000004,
+ 43.60970109562513,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json)"
+ ],
+ [
+ "encnet",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 40.096,
+ 41.71317203062112,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json)"
+ ],
+ [
+ "encnet",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.61,
+ 44.01125617918497,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "encnet",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "8.6",
+ 4.579766763724604,
+ 75.672,
+ 77.08129779577173,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json)"
+ ],
+ [
+ "encnet",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "12.1",
+ 2.6579084094229293,
+ 75.81099999999999,
+ 77.20722229497062,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json)"
+ ],
+ [
+ "encnet",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "9.8",
+ 1.815523577456311,
+ 76.244,
+ 77.8544931191627,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json)"
+ ],
+ [
+ "encnet",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "13.7",
+ 1.2586694727171592,
+ 74.248,
+ 76.2517491915298,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json)"
+ ],
+ [
+ "encnet",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 77.93900000000001,
+ 79.12604738206694,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json)"
+ ],
+ [
+ "encnet",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.55499999999999,
+ 79.46808306901366,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json)"
+ ],
+ [
+ "encnet",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 77.444,
+ 78.71857972971966,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json)"
+ ],
+ [
+ "encnet",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 76.099,
+ 76.97183988185541,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json)"
+ ]
+ ]
+ ]
+ },
+ "psanet": {
+ "voc12aug": [
+ [
+ [
+ "PSANet",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.9",
+ 18.243332440478824,
+ 76.393,
+ 77.3406686160825,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "10.4",
+ 12.62519076615176,
+ 77.90700000000001,
+ 79.30352112599553,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.305,
+ 77.35064085992029,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.73400000000001,
+ 79.0523901742458,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "PSANet",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "7.0",
+ 3.1667705694500796,
+ 77.628,
+ 79.0422496865149,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "10.5",
+ 2.2037021448307477,
+ 79.13900000000001,
+ 80.19378817152979,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "7.9",
+ 1.4027260879051224,
+ 77.991,
+ 79.63690034167004,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "11.9",
+ 0.9842023985382289,
+ 78.432,
+ 80.26050149532226,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 77.239,
+ 78.69336591221833,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.315,
+ 80.53461181920574,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.315,
+ 80.91271206157141,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.687,
+ 80.88962393764702,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "PSANet",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "9.0",
+ 18.906812073042055,
+ 41.141,
+ 41.91306881085375,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.5",
+ 13.12564520230877,
+ 43.797999999999995,
+ 44.751736929040355,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 41.668,
+ 42.950020936188984,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json)"
+ ],
+ [
+ "PSANet",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.742999999999995,
+ 45.37610124877713,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json)"
+ ]
+ ]
+ ]
+ },
+ "danet": {
+ "voc12aug": [
+ [
+ [
+ "DANet",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.5",
+ 20.943311686542472,
+ 74.455,
+ 75.68810367906634,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.9",
+ 13.758801929101844,
+ 76.024,
+ 77.22605579984322,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json)"
+ ],
+ [
+ "DANet",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.371,
+ 77.29119104649632,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.508,
+ 77.31718399039389,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "DANet",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "11.5",
+ 21.197953173726543,
+ 41.662,
+ 42.90219783063448,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "15.0",
+ 14.176784169645225,
+ 43.645,
+ 45.19098849554861,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json)"
+ ],
+ [
+ "DANet",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.449999999999996,
+ 43.251880532863545,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 44.171,
+ 45.016860694179314,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "DANet",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "7.4",
+ 2.655504792992914,
+ 78.741,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "10.9",
+ 1.9939886829099438,
+ 80.521,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json)"
+ ],
+ [
+ "DANet",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "8.8",
+ 1.5557926799730137,
+ 78.88,
+ 80.61866776927825,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "12.8",
+ 1.0655867297959223,
+ 79.88199999999999,
+ 81.46525733416875,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json)"
+ ],
+ [
+ "DANet",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.336,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 80.413,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json)"
+ ],
+ [
+ "DANet",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.274,
+ 80.96441839831498,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json)"
+ ],
+ [
+ "DANet",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 80.471,
+ 82.020171090948,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json)"
+ ]
+ ]
+ ]
+ },
+ "hrnet": {
+ "voc12aug": [
+ [
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 20000,
+ "1.8",
+ 43.364505532130885,
+ 65.201,
+ 68.55284135943813,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503-56e36088.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x512",
+ 20000,
+ "2.9",
+ 23.482760884011036,
+ 72.303,
+ 74.70589725240711,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x512",
+ 20000,
+ "6.2",
+ 22.047745500601465,
+ 75.87,
+ 78.57597654496765,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 66.61200000000001,
+ 70.0031319918366,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 72.904,
+ 75.58601750093821,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.237,
+ 78.48754167864209,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x1024",
+ 40000,
+ "1.7",
+ 23.74297838183743,
+ 73.859,
+ 75.90997145624684,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x1024",
+ 40000,
+ "2.9",
+ 12.96853348364565,
+ 77.188,
+ 78.91665724639267,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x1024",
+ 40000,
+ "6.2",
+ 6.421700443191522,
+ 78.483,
+ 79.69458922303686,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 75.306,
+ 77.47890927385332,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.64999999999999,
+ 80.35059171130018,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.928,
+ 80.71977327982115,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x1024",
+ 160000,
+ "-",
+ "-",
+ 76.312,
+ 78.31118288010825,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x1024",
+ 160000,
+ "-",
+ "-",
+ 78.797,
+ 80.74329822797024,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x1024",
+ 160000,
+ "-",
+ "-",
+ 80.651,
+ 81.92482068666172,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 80000,
+ "3.8",
+ 38.65539699852906,
+ 31.384,
+ 32.452806656988855,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x512",
+ 80000,
+ "4.9",
+ 22.569194335083992,
+ 35.515,
+ 36.804822066348805,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145-66f20cb7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x512",
+ 80000,
+ "8.2",
+ 21.234892591194093,
+ 41.897,
+ 43.26888294374513,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18-Small",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 32.995000000000005,
+ 34.547964211800654,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W18",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 36.786,
+ 38.58485577782462,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json)"
+ ],
+ [
+ "FCN",
+ "HRNetV2p-W48",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.018,
+ 43.86047333076445,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json)"
+ ]
+ ]
+ ]
+ },
+ "ann": {
+ "ade20k": [
+ [
+ [
+ "ANN",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "9.1",
+ 21.01150654479224,
+ 41.008,
+ 42.299370248011755,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.5",
+ 14.116439500308603,
+ 42.939,
+ 44.180116688803125,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json)"
+ ],
+ [
+ "ANN",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 41.744,
+ 42.61939537832803,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.936,
+ 44.05749393457835,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "ANN",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "6.0",
+ 3.7066015347562153,
+ 77.402,
+ 78.56666466963291,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "9.5",
+ 2.5468121299522504,
+ 76.553,
+ 78.85000230335912,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json)"
+ ],
+ [
+ "ANN",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "6.8",
+ 1.6951337367703907,
+ 78.89399999999999,
+ 80.45833256780746,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "10.7",
+ 1.1484480822281227,
+ 79.325,
+ 80.94411938511638,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json)"
+ ],
+ [
+ "ANN",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 77.345,
+ 78.65222072634322,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 77.137,
+ 78.81361594500169,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json)"
+ ],
+ [
+ "ANN",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 78.879,
+ 80.5665089108356,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 78.803,
+ 80.34287446616453,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "ANN",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.0",
+ 20.919551932584206,
+ 74.86,
+ 76.12674212435266,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.5",
+ 13.944150769190673,
+ 77.47,
+ 78.69711736662727,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json)"
+ ],
+ [
+ "ANN",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.556,
+ 77.5139251733015,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json)"
+ ],
+ [
+ "ANN",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.69500000000001,
+ 78.05579776330663,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json)"
+ ]
+ ]
+ ]
+ },
+ "pspnet": {
+ "ade20k": [
+ [
+ [
+ "PSPNet",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "8.5",
+ 23.526579373672153,
+ 41.134,
+ 41.941132390638955,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.0",
+ 15.301938618847755,
+ 43.57,
+ 44.354975719492394,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.477,
+ 43.441892719742064,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 44.39,
+ 45.34825070704653,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "PSPNet",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "6.1",
+ 4.072768293326251,
+ 77.848,
+ 79.18377782829393,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "9.6",
+ 2.6817753401497195,
+ 78.34,
+ 79.74414521564499,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "6.9",
+ 1.7590560538055864,
+ 78.262,
+ 79.88301952959716,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "10.9",
+ 1.1539857289832562,
+ 79.082,
+ 80.2847015735947,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.55199999999999,
+ 79.79089188640063,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.756,
+ 81.01164255858869,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.58800000000001,
+ 80.68588581173638,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.77499999999999,
+ 81.05734239329955,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "PSPNet",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.1",
+ 23.594295286990285,
+ 76.778,
+ 77.61449930304435,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.6",
+ 15.016859227435978,
+ 78.472,
+ 79.24767235924098,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.294,
+ 78.48376581837772,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json)"
+ ],
+ [
+ "PSPNet",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 78.524,
+ 79.56722327765866,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json)"
+ ]
+ ]
+ ]
+ },
+ "deeplabv3": {
+ "voc12aug": [
+ [
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.1",
+ 13.882586968538902,
+ 76.17,
+ 77.42428903363798,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.6",
+ 9.81331369081087,
+ 78.704,
+ 79.9523799897917,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.676,
+ 78.78389817782097,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.923,
+ 79.17787250140825,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "6.1",
+ 2.57047659861635,
+ 79.091,
+ 80.4506523590434,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "9.6",
+ 1.9222440928636317,
+ 77.121,
+ 79.61407891260694,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "6.9",
+ 1.1119590479409436,
+ 78.581,
+ 79.89433614719104,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "10.9",
+ 0.832582701195375,
+ 79.27300000000001,
+ 80.11177730128428,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.31700000000001,
+ 80.56867900987751,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 80.19500000000001,
+ 81.21365141510776,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.893,
+ 81.0599984851973,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.668,
+ 80.81226045958836,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "8.9",
+ 14.763588319372595,
+ 42.422,
+ 43.27846378978279,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.4",
+ 10.144087811258307,
+ 44.080999999999996,
+ 45.19313139034226,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.657000000000004,
+ 44.08872105809725,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json)"
+ ],
+ [
+ "DeepLabV3",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 44.999,
+ 46.65804362786369,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json)"
+ ]
+ ]
+ ]
+ },
+ "nonlocal_net": {
+ "ade20k": [
+ [
+ [
+ "NonLocal",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "9.1",
+ 21.37048896225747,
+ 40.752,
+ 42.053557458158075,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.6",
+ 13.965079302337752,
+ 42.896,
+ 44.26894963193766,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.028,
+ 43.03561642742581,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.361,
+ 44.82970596168541,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422-affd0f8d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "NonLocal",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "6.4",
+ 21.213895119736144,
+ 76.199,
+ 77.11520756528137,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.8",
+ 14.009331593316489,
+ 78.146,
+ 78.86433067761453,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.646,
+ 77.47020448125416,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 78.27300000000001,
+ 79.11788410243086,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "NonLocal",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "7.4",
+ 2.7236662742933437,
+ 78.237,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "10.9",
+ 1.9480966751075284,
+ 78.657,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "8.9",
+ 1.5246259413816563,
+ 78.327,
+ 79.92096670245425,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "12.8",
+ 1.0477751460724616,
+ 78.569,
+ 80.29003703614515,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.009,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.93,
+ "-",
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.052,
+ 80.67913947439877,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json)"
+ ],
+ [
+ "NonLocal",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.4,
+ 80.85278857807543,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json)"
+ ]
+ ]
+ ]
+ },
+ "gcnet": {
+ "voc12aug": [
+ [
+ [
+ "GCNet",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "5.8",
+ 23.350259534912006,
+ 76.42099999999999,
+ 77.50740243914798,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "9.2",
+ 14.799942609024914,
+ 77.40700000000001,
+ 78.56005567821165,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.238,
+ 77.63464439678829,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 77.84299999999999,
+ 78.59489046439079,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json)"
+ ]
+ ]
+ ],
+ "ade20k": [
+ [
+ [
+ "GCNet",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "8.5",
+ 23.37990361060126,
+ 41.465999999999994,
+ 42.853494172834885,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "12.0",
+ 15.198333955746829,
+ 42.824,
+ 44.54431618918491,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 42.367,
+ 43.51941132800723,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.686,
+ 45.21077897100608,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "GCNet",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "5.8",
+ 3.9294375140356674,
+ 77.691,
+ 78.55901060780846,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "9.2",
+ 2.613929250881175,
+ 78.276,
+ 79.34154953801408,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "6.5",
+ 1.6665314351879814,
+ 78.117,
+ 80.08636386919896,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "10.5",
+ 1.130548704280006,
+ 78.949,
+ 80.70740508232963,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 78.481,
+ 80.00715692663934,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 79.02900000000001,
+ 79.8389342161561,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 78.682,
+ 80.66434566958863,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json)"
+ ],
+ [
+ "GCNet",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.184,
+ 80.70740508232963,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json)"
+ ]
+ ]
+ ]
+ },
+ "deeplabv3plus": {
+ "ade20k": [
+ [
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "512x512",
+ 80000,
+ "10.6",
+ 21.009967570414005,
+ 42.725,
+ 43.750872665309245,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "512x512",
+ 80000,
+ "14.1",
+ 14.156578683381744,
+ 44.604,
+ 46.057602920856496,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 43.952999999999996,
+ 44.9257356479825,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "512x512",
+ 160000,
+ "-",
+ "-",
+ 45.467999999999996,
+ 46.35142741219229,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json)"
+ ]
+ ]
+ ],
+ "voc12aug": [
+ [
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "512x512",
+ 20000,
+ "7.6",
+ 20.995826216517777,
+ 75.932,
+ 77.49501357998696,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "512x512",
+ 20000,
+ "11.0",
+ 13.877644753051397,
+ 77.216,
+ 78.59404066425819,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 76.80799999999999,
+ 77.56956435172417,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "512x512",
+ 40000,
+ "-",
+ "-",
+ 78.618,
+ 79.5312727643948,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json)"
+ ]
+ ]
+ ],
+ "cityscapes": [
+ [
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "512x1024",
+ 40000,
+ "7.5",
+ 3.937852781596224,
+ 79.606,
+ 81.0126987140963,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "512x1024",
+ 40000,
+ "11.0",
+ 2.6029196398088135,
+ 80.208,
+ 81.81580429286755,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "769x769",
+ 40000,
+ "8.5",
+ 1.7219797309503193,
+ 78.972,
+ 80.46092552803746,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "769x769",
+ 40000,
+ "12.5",
+ 1.1546806682489152,
+ 79.461,
+ 80.5005593465169,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 80.08800000000001,
+ 81.13450865498024,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "512x1024",
+ 80000,
+ "-",
+ "-",
+ 80.972,
+ 82.02915734982798,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-50-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 79.827,
+ 81.47591334418544,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json)"
+ ],
+ [
+ "DeepLabV3+",
+ "R-101-D8",
+ "769x769",
+ 80000,
+ "-",
+ "-",
+ 80.97999999999999,
+ 82.17610990719812,
+ "[model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth) | [log](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmsegmentation/models/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405.log.json)"
+ ]
+ ]
+ ]
+ }
+}
diff --git a/docs/model_zoo.md b/docs/model_zoo.md
new file mode 100644
index 0000000000..8f39928e76
--- /dev/null
+++ b/docs/model_zoo.md
@@ -0,0 +1,107 @@
+# Benchmark and Model Zoo
+
+## Common settings
+
+* We use distributed training with 4 GPUs by default.
+* All pytorch-style pretrained backbones on ImageNet are train by ourselves, with the same procedure in the [paper](https://arxiv.org/pdf/1812.01187.pdf).
+Our ResNet style backbone are based on ResNetV1c variant, where the 7x7 conv in the input stem is replaced with three 3x3 convs.
+* For the consistency across different hardwares, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 4 GPUs with `torch.backends.cudnn.benchmark=False`.
+ Note that this value is usually less than what `nvidia-smi` shows.
+* We report the inference time as the total time of network forwarding and post-processing, excluding the data loading time.
+Results are obtained with the script `tools/benchmark.py` which computes the average time on 200 images with `torch.backends.cudnn.benchmark=False`.
+* There are two inference modes in this framework.
+ * `slide` mode: The `test_cfg` will be like `dict(mode='slide', crop_size=(769, 769), stride=(513, 513))`.
+
+ In this mode, multiple patches will be cropped from input image, passed into network individually.
+ The crop size and stride between patches are specified by `crop_size` and `stride`.
+ The overlapping area will be merged by average
+ * `whole` mode: The `test_cfg` will be like `dict(mode='whole')`.
+
+ In this mode, the whole imaged will be passed into network directly.
+* For input size of 8x+1 (e.g. 769), `align_corner=True` is adopted as a traditional practice.
+Otherwise, for input size of 8x (e.g. 512, 1024), `align_corner=False` is adopted.
+
+## Baselines
+
+### FCN
+
+Please refer to [FCN](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) for details.
+
+### PSPNet
+
+Please refer to [PSPNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) for details.
+
+### DeepLabV3
+
+Please refer to [DeepLabV3](https://github.com/open-mmlab/mmsegmentatio/tree/master/configs/deeplabv3) for details.
+
+### PSANet
+
+Please refer to [PSANet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/psanet) for details.
+
+### DeepLabV3+
+
+Please refer to [DeepLabV3+](https://github.com/open-mmlab/mmsegmentatio/tree/master/configs/deeplabv3plus) for details.
+
+### UPerNet
+
+Please refer to [UPerNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) for details.
+
+### NonLocal Net
+
+Please refer to [NonLocal Net](https://github.com/open-mmlab/mmsegmentatio/tree/master/configs/nlnet) for details.
+
+### CCNet
+
+Please refer to [CCNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ccnet) for details.
+
+### DANet
+
+Please refer to [DANet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) for details.
+
+### HRNet
+
+Please refer to [HRNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/hrnet) for details.
+
+### GCNet
+
+Please refer to [GCNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) for details.
+
+### ANN
+
+Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) for details.
+
+### OCRNet
+
+Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) for details.
+
+## Speed benchmark
+
+### Hardware
+
+- 8 NVIDIA Tesla V100 (32G) GPUs
+- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
+
+### Software environment
+
+- Python 3.7
+- PyTorch 1.5
+- CUDA 10.1
+- CUDNN 7.6.03
+- NCCL 2.4.08
+
+### Training speed
+
+For fair comparison, we benchmark all implementations with ResNet-101V1c.
+The input size is fixed to 1024x512 with batch size 2.
+
+The training speed is reported as followed, in terms of second per iter (s/iter). The lower, the better.
+
+| Implementation | PSPNet (s/iter) | DeepLabV3+ (s/iter) |
+|----------------|-----------------|---------------------|
+| [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) | **0.83** | **0.85** |
+| [SegmenTron](https://github.com/LikeLy-Journey/SegmenTron) | 0.84 | 0.85 |
+| [CASILVision](https://github.com/CSAILVision/semantic-segmentation-pytorch) | 1.15 | N/A |
+| [vedaseg](https://github.com/Media-Smart/vedaseg) | 0.95 | 1.25 |
+
+Note: The output stride of DeepLabV3+ is 8.
diff --git a/docs/tutorials/data_pipeline.md b/docs/tutorials/data_pipeline.md
new file mode 100644
index 0000000000..825260d32c
--- /dev/null
+++ b/docs/tutorials/data_pipeline.md
@@ -0,0 +1,156 @@
+# 2. Custom Data Pipelines
+
+## Design of Data pipelines
+
+Following typical conventions, we use `Dataset` and `DataLoader` for data loading
+with multiple workers. `Dataset` returns a dict of data items corresponding
+the arguments of models' forward method.
+Since the data in semantic segmentation may not be the same size,
+we introduce a new `DataContainer` type in MMCV to help collect and distribute
+data of different size.
+See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
+
+The data preparation pipeline and the dataset is decomposed. Usually a dataset
+defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict.
+A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform.
+
+The operations are categorized into data loading, pre-processing, formatting and test-time augmentation.
+
+Here is an pipeline example for PSPNet.
+
+```python
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 1024),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+```
+
+For each operation, we list the related dict fields that are added/updated/removed.
+
+### Data loading
+
+`LoadImageFromFile`
+- add: img, img_shape, ori_shape
+
+`LoadAnnotations`
+- add: gt_semantic_seg, seg_fields
+
+### Pre-processing
+
+`Resize`
+- add: scale, scale_idx, pad_shape, scale_factor, keep_ratio
+- update: img, img_shape, *seg_fields
+
+`RandomFlip`
+- add: flip
+- update: img, *seg_fields
+
+`Pad`
+- add: pad_fixed_size, pad_size_divisor
+- update: img, pad_shape, *seg_fields
+
+`RandomCrop`
+- update: img, pad_shape, *seg_fields
+
+`Normalize`
+- add: img_norm_cfg
+- update: img
+
+`SegRescale`
+- update: gt_semantic_seg
+
+`PhotoMetricDistortion`
+- update: img
+
+### Formatting
+
+`ToTensor`
+- update: specified by `keys`.
+
+`ImageToTensor`
+- update: specified by `keys`.
+
+`Transpose`
+- update: specified by `keys`.
+
+`ToDataContainer`
+- update: specified by `fields`.
+
+`DefaultFormatBundle`
+- update: img, gt_semantic_seg
+
+`Collect`
+- add: img_meta (the keys of img_meta is specified by `meta_keys`)
+- remove: all other keys except for those specified by `keys`
+
+### Test time augmentation
+
+`MultiScaleFlipAug`
+
+## Extend and use custom pipelines
+
+1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict.
+
+ ```python
+ from mmseg.datasets import PIPELINES
+
+ @PIPELINES.register_module()
+ class MyTransform:
+
+ def __call__(self, results):
+ results['dummy'] = True
+ return results
+ ```
+
+2. Import the new class.
+
+ ```python
+ from .my_pipeline import MyTransform
+ ```
+
+3. Use it in config files.
+
+ ```python
+ img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+ crop_size = (512, 1024)
+ train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='MyTransform'),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+ ]
+ ```
diff --git a/docs/tutorials/index.rst b/docs/tutorials/index.rst
new file mode 100644
index 0000000000..778191bb43
--- /dev/null
+++ b/docs/tutorials/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+ :maxdepth: 2
+
+ new_dataset.md
+ data_pipeline.md
+ new_modules.md
diff --git a/docs/tutorials/new_dataset.md b/docs/tutorials/new_dataset.md
new file mode 100644
index 0000000000..0ad1019e0e
--- /dev/null
+++ b/docs/tutorials/new_dataset.md
@@ -0,0 +1,161 @@
+# 1. Adding New Dataset
+
+## Customize datasets by reorganizing data
+
+The simplest way is to convert your dataset to organize your data into folders.
+
+An example of file structure is as followed.
+```
+├── data
+│ ├── my_dataset
+│ │ ├── img_dir
+│ │ │ ├── train
+│ │ │ │ ├── xxx{img_suffix}
+│ │ │ │ ├── yyy{img_suffix}
+│ │ │ │ ├── zzz{img_suffix}
+│ │ │ ├── val
+│ │ ├── ann_dir
+│ │ │ ├── train
+│ │ │ │ ├── xxx{seg_map_suffix}
+│ │ │ │ ├── yyy{seg_map_suffix}
+│ │ │ │ ├── zzz{seg_map_suffix}
+│ │ │ ├── val
+
+```
+A training pair will consist of the files with same suffix in img_dir/ann_dir.
+
+If `split` argument is given, only part of the files in img_dir/ann_dir will be loaded.
+We may specify the prefix of files we would like to be included in the split txt.
+
+More specifically, for a split txt like following,
+```
+xxx
+zzz
+```
+Only
+`data/my_dataset/img_dir/train/xxx{img_suffix}`,
+`data/my_dataset/img_dir/train/zzz{img_suffix}`,
+`data/my_dataset/ann_dir/train/xxx{seg_map_suffix}`,
+`data/my_dataset/ann_dir/train/zzz{seg_map_suffix}` will be loaded.
+
+## Customize datasets by mixing dataset
+
+MMSegmentation also supports to mix dataset for training.
+Currently it supports to concat and repeat datasets.
+
+### Repeat dataset
+
+We use `RepeatDataset` as wrapper to repeat the dataset.
+For example, suppose the original dataset is `Dataset_A`, to repeat it, the config looks like the following
+```python
+dataset_A_train = dict(
+ type='RepeatDataset',
+ times=N,
+ dataset=dict( # This is the original config of Dataset_A
+ type='Dataset_A',
+ ...
+ pipeline=train_pipeline
+ )
+ )
+```
+
+### Concatenate dataset
+
+There 2 ways to concatenate the dataset.
+
+1. If the datasets you want to concatenate are in the same type with different annotation files,
+ you can concatenate the dataset configs like the following.
+
+ 1. You may concatenate two `ann_dir`.
+ ```python
+ dataset_A_train = dict(
+ type='Dataset_A',
+ img_dir = 'img_dir',
+ ann_dir = ['anno_dir_1', 'anno_dir_2'],
+ pipeline=train_pipeline
+ )
+ ```
+ 2. You may concatenate two `split`.
+
+ ```python
+ dataset_A_train = dict(
+ type='Dataset_A',
+ img_dir = 'img_dir',
+ ann_dir = 'anno_dir',
+ split = ['split_1.txt', 'split_2.txt'],
+ pipeline=train_pipeline
+ )
+ ```
+ 3. You may concatenate two `ann_dir` and `split` simultaneously.
+
+ ```python
+ dataset_A_train = dict(
+ type='Dataset_A',
+ img_dir = 'img_dir',
+ ann_dir = ['anno_dir_1', 'anno_dir_2'],
+ split = ['split_1.txt', 'split_2.txt'],
+ pipeline=train_pipeline
+ )
+ ```
+ In this case, `ann_dir_1` and `ann_dir_2` are corresponding to `split_1.txt` and `split_2.txt`.
+
+2. In case the dataset you want to concatenate is different, you can concatenate the dataset configs like the following.
+
+ ```python
+ dataset_A_train = dict()
+ dataset_B_train = dict()
+
+ data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train = [
+ dataset_A_train,
+ dataset_B_train
+ ],
+ val = dataset_A_val,
+ test = dataset_A_test
+ )
+ ```
+
+
+A more complex example that repeats `Dataset_A` and `Dataset_B` by N and M times, respectively, and then concatenates the repeated datasets is as the following.
+
+```python
+dataset_A_train = dict(
+ type='RepeatDataset',
+ times=N,
+ dataset=dict(
+ type='Dataset_A',
+ ...
+ pipeline=train_pipeline
+ )
+)
+dataset_A_val = dict(
+ ...
+ pipeline=test_pipeline
+)
+dataset_A_test = dict(
+ ...
+ pipeline=test_pipeline
+)
+dataset_B_train = dict(
+ type='RepeatDataset',
+ times=M,
+ dataset=dict(
+ type='Dataset_B',
+ ...
+ pipeline=train_pipeline
+ )
+)
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train = [
+ dataset_A_train,
+ dataset_B_train
+ ],
+ val = dataset_A_val,
+ test = dataset_A_test
+)
+
+```
diff --git a/docs/tutorials/new_modules.md b/docs/tutorials/new_modules.md
new file mode 100644
index 0000000000..5940880907
--- /dev/null
+++ b/docs/tutorials/new_modules.md
@@ -0,0 +1,234 @@
+# 3. Adding New Modules
+
+## Customize optimizer
+
+Assume you want to add a optimizer named as `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to first implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`:
+
+```python
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module
+class MyOptimizer(Optimizer):
+
+ def __init__(self, a, b, c)
+
+```
+
+Then add this module in `mmseg/core/optimizer/__init__.py` thus the registry will
+find the new module and add it:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+
+To use your own optimizer, the field can be changed as
+
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following.
+
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+
+The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+
+## Customize optimizer constructor
+
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNoarm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+
+```
+from mmcv.utils import build_from_cfg
+
+from mmcv.runner import OPTIMIZER_BUILDERS
+from .cocktail_optimizer import CocktailOptimizer
+
+
+@OPTIMIZER_BUILDERS.register_module
+class CocktailOptimizerConstructor(object):
+
+ def __init__(self, optimizer_cfg, paramwise_cfg=None):
+
+ def __call__(self, model):
+
+ return my_optimizer
+
+```
+
+## Develop new components
+
+There are mainly 2 types of components in MMSegmentation.
+
+- backbone: usually stacks of convolutional network to extract feature maps, e.g., ResNet, HRNet.
+- head: the component for semantic segmentation map decoding.
+
+### Add new backbones
+
+Here we show how to develop new components with an example of MobileNet.
+
+1. Create a new file `mmseg/models/backbones/mobilenet.py`.
+
+```python
+import torch.nn as nn
+
+from ..registry import BACKBONES
+
+
+@BACKBONES.register_module
+class MobileNet(nn.Module):
+
+ def __init__(self, arg1, arg2):
+ pass
+
+ def forward(self, x): # should return a tuple
+ pass
+
+ def init_weights(self, pretrained=None):
+ pass
+```
+
+2. Import the module in `mmseg/models/backbones/__init__.py`.
+
+```python
+from .mobilenet import MobileNet
+```
+
+3. Use it in your config file.
+
+```python
+model = dict(
+ ...
+ backbone=dict(
+ type='MobileNet',
+ arg1=xxx,
+ arg2=xxx),
+ ...
+```
+
+### Add new heads
+
+In MMSegmentation, we provide a base [BaseDecodeHead](../../mmseg/models/decode_heads/decode_head.py) for all segmentation head.
+All newly implemented decode heads should be derived from it.
+Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following.
+
+First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`.
+PSPNet implements a decode head for segmentation decode.
+To implement a decode head, basically we need to implement three functions of the new module as the following.
+
+```python
+@HEADS.register_module()
+class PSPHead(BaseDecodeHead):
+
+ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+ super(PSPHead, self).__init__(**kwargs)
+
+ def init_weights(self):
+
+ def forward(self, inputs):
+
+```
+
+Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py` thus the corresponding registry could find and load them.
+
+To config file of PSPNet is as the following
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='PSPHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ pool_scales=(1, 2, 3, 6),
+ drop_out_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
+
+```
+
+### Add new loss
+
+Assume you want to add a new loss as `MyLoss` for segmentation decode.
+To add a new loss function, the users need implement it in `mmseg/models/losses/my_loss.py`.
+The decorator `weighted_loss` enable the loss to be weighted for each element.
+
+```python
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from .utils import weighted_loss
+
+@weighted_loss
+def my_loss(pred, target):
+ assert pred.size() == target.size() and target.numel() > 0
+ loss = torch.abs(pred - target)
+ return loss
+
+@LOSSES.register_module
+class MyLoss(nn.Module):
+
+ def __init__(self, reduction='mean', loss_weight=1.0):
+ super(MyLoss, self).__init__()
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None):
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss = self.loss_weight * my_loss(
+ pred, target, weight, reduction=reduction, avg_factor=avg_factor)
+ return loss
+```
+
+Then the users need to add it in the `mmseg/models/losses/__init__.py`.
+
+```python
+from .my_loss import MyLoss, my_loss
+
+```
+
+To use it, modify the `loss_xxx` field.
+Then you need to modify the `loss_decode` field in the head.
+`loss_weight` could be used to balance multiple losses.
+
+```python
+loss_decode=dict(type='MyLoss', loss_weight=1.0))
+```
diff --git a/docs/tutorials/training_tricks.md b/docs/tutorials/training_tricks.md
new file mode 100644
index 0000000000..5ff4b18a70
--- /dev/null
+++ b/docs/tutorials/training_tricks.md
@@ -0,0 +1,28 @@
+# 4. Training Tricks
+
+MMSegmentation support following training tricks out of box.
+
+## Different Learning Rate(LR) for Backbone and Heads
+
+In semantic segmentation, some methods make the LR of heads larger than backbone to achieve better performance or faster convergence.
+
+In MMSegmentation, you may add following lines to config to make the LR of heads 10 times of backbone.
+```python
+optimizer_config=dict(
+ paramwise_cfg = dict(
+ custom_keys={
+ 'head': dict(lr_mult=10.)}))
+```
+With this modification, the LR of any parameter group with `'head'` in name will be multiplied by 10.
+You may refer to [MMCV doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.DefaultOptimizerConstructor) for further details.
+
+## Online Hard Example Mining (OHEM)
+We implement pixel sampler [here](https://github.com/open-mmlab/mmsegmentation/tree/master/mmseg/core/seg/sampler) for training sampling.
+Here is an example config of training PSPNet with OHEM enabled.
+```python
+_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py'
+model=dict(
+ decode_head=dict(
+ sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=100000)) )
+```
+In this way, only pixels with confidence score under 0.7 are used to train. And we keep at least 100000 pixels during training.
diff --git a/mmseg/VERSION b/mmseg/VERSION
new file mode 100644
index 0000000000..8f0916f768
--- /dev/null
+++ b/mmseg/VERSION
@@ -0,0 +1 @@
+0.5.0
diff --git a/mmseg/__init__.py b/mmseg/__init__.py
new file mode 100644
index 0000000000..1c4f7e8fcc
--- /dev/null
+++ b/mmseg/__init__.py
@@ -0,0 +1,3 @@
+from .version import __version__, short_version
+
+__all__ = ['__version__', 'short_version']
diff --git a/mmseg/apis/__init__.py b/mmseg/apis/__init__.py
new file mode 100644
index 0000000000..170724be38
--- /dev/null
+++ b/mmseg/apis/__init__.py
@@ -0,0 +1,9 @@
+from .inference import inference_segmentor, init_segmentor, show_result_pyplot
+from .test import multi_gpu_test, single_gpu_test
+from .train import get_root_logger, set_random_seed, train_segmentor
+
+__all__ = [
+ 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor',
+ 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test',
+ 'show_result_pyplot'
+]
diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py
new file mode 100644
index 0000000000..3ba6b62ce1
--- /dev/null
+++ b/mmseg/apis/inference.py
@@ -0,0 +1,116 @@
+import matplotlib.pyplot as plt
+import mmcv
+import torch
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+
+from mmseg.datasets.pipelines import Compose
+from mmseg.models import build_segmentor
+
+
+def init_segmentor(config, checkpoint=None, device='cuda:0'):
+ """Initialize a segmentor from config file.
+
+ Args:
+ config (str or :obj:`mmcv.Config`): Config file path or the config
+ object.
+ checkpoint (str, optional): Checkpoint path. If left as None, the model
+ will not load any weights.
+
+ Returns:
+ nn.Module: The constructed segmentor.
+ """
+ if isinstance(config, str):
+ config = mmcv.Config.fromfile(config)
+ elif not isinstance(config, mmcv.Config):
+ raise TypeError('config must be a filename or Config object, '
+ 'but got {}'.format(type(config)))
+ config.model.pretrained = None
+ model = build_segmentor(config.model, test_cfg=config.test_cfg)
+ if checkpoint is not None:
+ checkpoint = load_checkpoint(model, checkpoint)
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ model.PALETTE = checkpoint['meta']['PALETTE']
+ model.cfg = config # save the config in the model for convenience
+ model.to(device)
+ model.eval()
+ return model
+
+
+class LoadImage:
+ """A simple pipeline to load image."""
+
+ def __call__(self, results):
+ """Call function to load images into results.
+
+ Args:
+ results (dict): A result dict contains the file name
+ of the image to be read.
+
+ Returns:
+ dict: ``results`` will be returned containing loaded image.
+ """
+
+ if isinstance(results['img'], str):
+ results['filename'] = results['img']
+ results['ori_filename'] = results['img']
+ else:
+ results['filename'] = None
+ results['ori_filename'] = None
+ img = mmcv.imread(results['img'])
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ return results
+
+
+def inference_segmentor(model, img):
+ """Inference image(s) with the segmentor.
+
+ Args:
+ model (nn.Module): The loaded segmentor.
+ imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
+ images.
+
+ Returns:
+ (list[Tensor]): The segmentation result.
+ """
+ cfg = model.cfg
+ device = next(model.parameters()).device # model device
+ # build the data pipeline
+ test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
+ test_pipeline = Compose(test_pipeline)
+ # prepare data
+ data = dict(img=img)
+ data = test_pipeline(data)
+ data = collate([data], samples_per_gpu=1)
+ if next(model.parameters()).is_cuda:
+ # scatter to specified GPU
+ data = scatter(data, [device])[0]
+ else:
+ data['img_metas'] = data['img_metas'][0].data
+
+ # forward the model
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ return result
+
+
+def show_result_pyplot(model, img, result, palette=None, fig_size=(15, 10)):
+ """Visualize the segmentation results on the image.
+
+ Args:
+ model (nn.Module): The loaded segmentor.
+ img (str or np.ndarray): Image filename or loaded image.
+ result (list): The segmentation result.
+ palette (list[list[int]]] | None): The palette of segmentation
+ map. If None is given, random palette will be generated.
+ Default: None
+ fig_size (tuple): Figure size of the pyplot figure.
+ """
+ if hasattr(model, 'module'):
+ model = model.module
+ img = model.show_result(img, result, palette=palette, show=False)
+ plt.figure(figsize=fig_size)
+ plt.imshow(mmcv.bgr2rgb(img))
+ plt.show()
diff --git a/mmseg/apis/test.py b/mmseg/apis/test.py
new file mode 100644
index 0000000000..8cbf236f05
--- /dev/null
+++ b/mmseg/apis/test.py
@@ -0,0 +1,191 @@
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+
+
+def single_gpu_test(model, data_loader, show=False, out_dir=None):
+ """Test with single GPU.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ show (bool): Whether show results during infernece. Default: False.
+ out_dir (str, optional): If specified, the results will be dumped
+ into the directory to save output results.
+
+ Returns:
+ list: The prediction results.
+ """
+
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=not show, **data)
+ if isinstance(results, list):
+ results.extend(result)
+ else:
+ results.append(result)
+
+ if show or out_dir:
+ img_tensor = data['img'][0]
+ img_metas = data['img_metas'][0].data[0]
+ imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
+ assert len(imgs) == len(img_metas)
+
+ for img, img_meta in zip(imgs, img_metas):
+ h, w, _ = img_meta['img_shape']
+ img_show = img[:h, :w, :]
+
+ ori_h, ori_w = img_meta['ori_shape'][:-1]
+ img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+
+ if out_dir:
+ out_file = osp.join(out_dir, img_meta['ori_filename'])
+ else:
+ out_file = None
+
+ model.module.show_result(
+ img_show,
+ result,
+ palette=dataset.PALETTE,
+ show=show,
+ out_file=out_file)
+
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ """Test model with multiple gpus.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+ it encodes results to gpu tensors and use gpu communication for results
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+ and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+ Returns:
+ list: The prediction results.
+ """
+
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ if isinstance(results, list):
+ results.extend(result)
+ else:
+ results.append(result)
+
+ if rank == 0:
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+ """Collect results with CPU."""
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ tmpdir = tempfile.mkdtemp()
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+ """Collect results with GPU."""
+ rank, world_size = get_dist_info()
+ # dump result part to tensor with pickle
+ part_tensor = torch.tensor(
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+ # gather all result part tensor shape
+ shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
+ dist.all_gather(shape_list, shape_tensor)
+ # padding result part tensor to max length
+ shape_max = torch.tensor(shape_list).max()
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+ part_send[:shape_tensor[0]] = part_tensor
+ part_recv_list = [
+ part_tensor.new_zeros(shape_max) for _ in range(world_size)
+ ]
+ # gather all result part
+ dist.all_gather(part_recv_list, part_send)
+
+ if rank == 0:
+ part_list = []
+ for recv, shape in zip(part_recv_list, shape_list):
+ part_list.append(
+ pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ return ordered_results
diff --git a/mmseg/apis/train.py b/mmseg/apis/train.py
new file mode 100644
index 0000000000..b703143587
--- /dev/null
+++ b/mmseg/apis/train.py
@@ -0,0 +1,106 @@
+import random
+
+import numpy as np
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import IterBasedRunner, build_optimizer
+
+from mmseg.core import DistEvalHook, EvalHook
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.utils import get_root_logger
+
+
+def set_random_seed(seed, deterministic=False):
+ """Set random seed.
+
+ Args:
+ seed (int): Seed to be used.
+ deterministic (bool): Whether to set the deterministic option for
+ CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+ to True and `torch.backends.cudnn.benchmark` to False.
+ Default: False.
+ """
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed_all(seed)
+ if deterministic:
+ torch.backends.cudnn.deterministic = True
+ torch.backends.cudnn.benchmark = False
+
+
+def train_segmentor(model,
+ dataset,
+ cfg,
+ distributed=False,
+ validate=False,
+ timestamp=None,
+ meta=None):
+ """Launch segmentor training."""
+ logger = get_root_logger(cfg.log_level)
+
+ # prepare data loaders
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+ data_loaders = [
+ build_dataloader(
+ ds,
+ cfg.data.samples_per_gpu,
+ cfg.data.workers_per_gpu,
+ # cfg.gpus will be ignored if distributed
+ len(cfg.gpu_ids),
+ dist=distributed,
+ seed=cfg.seed,
+ drop_last=True) for ds in dataset
+ ]
+
+ # put model on gpus
+ if distributed:
+ find_unused_parameters = cfg.get('find_unused_parameters', False)
+ # Sets the `find_unused_parameters` parameter in
+ # torch.nn.parallel.DistributedDataParallel
+ model = MMDistributedDataParallel(
+ model.cuda(),
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters)
+ else:
+ model = MMDataParallel(
+ model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+
+ # build runner
+ optimizer = build_optimizer(model, cfg.optimizer)
+
+ runner = IterBasedRunner(
+ model=model,
+ batch_processor=None,
+ optimizer=optimizer,
+ work_dir=cfg.work_dir,
+ logger=logger,
+ meta=meta)
+
+ # register hooks
+ runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
+ cfg.checkpoint_config, cfg.log_config,
+ cfg.get('momentum_config', None))
+
+ # an ugly walkaround to make the .log and .log.json filenames the same
+ runner.timestamp = timestamp
+
+ # register eval hooks
+ if validate:
+ val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+ val_dataloader = build_dataloader(
+ val_dataset,
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=distributed,
+ shuffle=False)
+ eval_cfg = cfg.get('evaluation', {})
+ eval_hook = DistEvalHook if distributed else EvalHook
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run(data_loaders, cfg.workflow, cfg.total_iters)
diff --git a/mmseg/core/__init__.py b/mmseg/core/__init__.py
new file mode 100644
index 0000000000..9656055872
--- /dev/null
+++ b/mmseg/core/__init__.py
@@ -0,0 +1,3 @@
+from .evaluation import * # noqa: F401, F403
+from .seg import * # noqa: F401, F403
+from .utils import * # noqa: F401, F403
diff --git a/mmseg/core/evaluation/__init__.py b/mmseg/core/evaluation/__init__.py
new file mode 100644
index 0000000000..f169d1bf1b
--- /dev/null
+++ b/mmseg/core/evaluation/__init__.py
@@ -0,0 +1,7 @@
+from .class_names import get_classes, get_palette
+from .eval_hooks import DistEvalHook, EvalHook
+from .mean_iou import mean_iou
+
+__all__ = [
+ 'EvalHook', 'DistEvalHook', 'mean_iou', 'get_classes', 'get_palette'
+]
diff --git a/mmseg/core/evaluation/class_names.py b/mmseg/core/evaluation/class_names.py
new file mode 100644
index 0000000000..0d8e66d54b
--- /dev/null
+++ b/mmseg/core/evaluation/class_names.py
@@ -0,0 +1,152 @@
+import mmcv
+
+
+def cityscapes_classes():
+ """Cityscapes class names for external use."""
+ return [
+ 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
+ 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
+ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+ 'bicycle'
+ ]
+
+
+def ade_classes():
+ """ADE20K class names for external use."""
+ return [
+ 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ',
+ 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth',
+ 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car',
+ 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug',
+ 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe',
+ 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
+ 'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
+ 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path',
+ 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door',
+ 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table',
+ 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove',
+ 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar',
+ 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
+ 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver',
+ 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister',
+ 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van',
+ 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything',
+ 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent',
+ 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank',
+ 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake',
+ 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce',
+ 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen',
+ 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
+ 'clock', 'flag'
+ ]
+
+
+def voc_classes():
+ """Pascal VOC class names for external use."""
+ return [
+ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
+ 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+ 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+ 'tvmonitor'
+ ]
+
+
+def cityscapes_palette():
+ """Cityscapes palette for external use."""
+ return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
+ [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
+ [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
+ [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
+ [0, 0, 230], [119, 11, 32]]
+
+
+def ade_palette():
+ """ADE20K palette for external use."""
+ return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
+ [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
+ [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
+ [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
+ [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
+ [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
+ [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
+ [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
+ [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
+ [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
+ [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
+ [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
+ [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
+ [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
+ [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
+ [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
+ [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
+ [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
+ [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
+ [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
+ [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
+ [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
+ [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
+ [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
+ [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
+ [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
+ [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
+ [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
+ [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
+ [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
+ [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
+ [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
+ [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
+ [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
+ [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
+ [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
+ [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
+ [102, 255, 0], [92, 0, 255]]
+
+
+def voc_palette():
+ """Pascal VOC palette for external use."""
+ return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
+ [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
+ [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
+ [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
+ [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
+
+
+dataset_aliases = {
+ 'cityscapes': ['cityscapes'],
+ 'ade': ['ade', 'ade20k'],
+ 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug']
+}
+
+
+def get_classes(dataset):
+ """Get class names of a dataset."""
+ alias2name = {}
+ for name, aliases in dataset_aliases.items():
+ for alias in aliases:
+ alias2name[alias] = name
+
+ if mmcv.is_str(dataset):
+ if dataset in alias2name:
+ labels = eval(alias2name[dataset] + '_classes()')
+ else:
+ raise ValueError(f'Unrecognized dataset: {dataset}')
+ else:
+ raise TypeError(f'dataset must a str, but got {type(dataset)}')
+ return labels
+
+
+def get_palette(dataset):
+ """Get class palette (RGB) of a dataset."""
+ alias2name = {}
+ for name, aliases in dataset_aliases.items():
+ for alias in aliases:
+ alias2name[alias] = name
+
+ if mmcv.is_str(dataset):
+ if dataset in alias2name:
+ labels = eval(alias2name[dataset] + '_palette()')
+ else:
+ raise ValueError(f'Unrecognized dataset: {dataset}')
+ else:
+ raise TypeError(f'dataset must a str, but got {type(dataset)}')
+ return labels
diff --git a/mmseg/core/evaluation/eval_hooks.py b/mmseg/core/evaluation/eval_hooks.py
new file mode 100644
index 0000000000..cbd0b23fe9
--- /dev/null
+++ b/mmseg/core/evaluation/eval_hooks.py
@@ -0,0 +1,80 @@
+import os.path as osp
+
+from mmcv.runner import Hook
+from torch.utils.data import DataLoader
+
+
+class EvalHook(Hook):
+ """Evaluation hook.
+
+ Attributes:
+ dataloader (DataLoader): A PyTorch dataloader.
+ interval (int): Evaluation interval (by epochs). Default: 1.
+ """
+
+ def __init__(self, dataloader, interval=1, **eval_kwargs):
+ if not isinstance(dataloader, DataLoader):
+ raise TypeError('dataloader must be a pytorch DataLoader, but got '
+ f'{type(dataloader)}')
+ self.dataloader = dataloader
+ self.interval = interval
+ self.eval_kwargs = eval_kwargs
+
+ def after_train_iter(self, runner):
+ """After train epoch hook."""
+ if not self.every_n_iters(runner, self.interval):
+ return
+ from mmseg.apis import single_gpu_test
+ runner.log_buffer.clear()
+ results = single_gpu_test(runner.model, self.dataloader, show=False)
+ self.evaluate(runner, results)
+
+ def evaluate(self, runner, results):
+ """Call evaluate function of dataset."""
+ eval_res = self.dataloader.dataset.evaluate(
+ results, logger=runner.logger, **self.eval_kwargs)
+ for name, val in eval_res.items():
+ runner.log_buffer.output[name] = val
+ runner.log_buffer.ready = True
+
+
+class DistEvalHook(EvalHook):
+ """Distributed evaluation hook.
+
+ Attributes:
+ dataloader (DataLoader): A PyTorch dataloader.
+ interval (int): Evaluation interval (by epochs). Default: 1.
+ tmpdir (str | None): Temporary directory to save the results of all
+ processes. Default: None.
+ gpu_collect (bool): Whether to use gpu or cpu to collect results.
+ Default: False.
+ """
+
+ def __init__(self,
+ dataloader,
+ interval=1,
+ gpu_collect=False,
+ **eval_kwargs):
+ if not isinstance(dataloader, DataLoader):
+ raise TypeError(
+ 'dataloader must be a pytorch DataLoader, but got {}'.format(
+ type(dataloader)))
+ self.dataloader = dataloader
+ self.interval = interval
+ self.gpu_collect = gpu_collect
+ self.eval_kwargs = eval_kwargs
+
+ def after_train_iter(self, runner):
+ """After train epoch hook."""
+ if not self.every_n_iters(runner, self.interval):
+ return
+ from mmseg.apis import multi_gpu_test
+ runner.log_buffer.clear()
+ results = multi_gpu_test(
+ runner.model,
+ self.dataloader,
+ tmpdir=osp.join(runner.work_dir, '.eval_hook'),
+ gpu_collect=self.gpu_collect)
+ if runner.rank == 0:
+ print('\n')
+ self.evaluate(runner, results)
diff --git a/mmseg/core/evaluation/mean_iou.py b/mmseg/core/evaluation/mean_iou.py
new file mode 100644
index 0000000000..f0b4234fb4
--- /dev/null
+++ b/mmseg/core/evaluation/mean_iou.py
@@ -0,0 +1,70 @@
+import numpy as np
+
+
+def intersect_and_union(pred_label, label, num_classes, ignore_index):
+ """Calculate intersection and Union.
+
+ Args:
+ pred_label (ndarray): Prediction segmentation map
+ label (ndarray): Ground truth segmentation map
+ num_classes (int): Number of categories
+ ignore_index (int): Index that will be ignored in evaluation.
+
+ Returns:
+ ndarray: The intersection of prediction and ground truth histogram
+ on all classes
+ ndarray: The union of prediction and ground truth histogram on all
+ classes
+ ndarray: The prediction histogram on all classes.
+ ndarray: The ground truth histogram on all classes.
+ """
+
+ mask = (label != ignore_index)
+ pred_label = pred_label[mask]
+ label = label[mask]
+
+ intersect = pred_label[pred_label == label]
+ area_intersect, _ = np.histogram(
+ intersect, bins=np.arange(num_classes + 1))
+ area_pred_label, _ = np.histogram(
+ pred_label, bins=np.arange(num_classes + 1))
+ area_label, _ = np.histogram(label, bins=np.arange(num_classes + 1))
+ area_union = area_pred_label + area_label - area_intersect
+
+ return area_intersect, area_union, area_pred_label, area_label
+
+
+def mean_iou(results, gt_seg_maps, num_classes, ignore_index):
+ """Calculate Intersection and Union (IoU)
+
+ Args:
+ results (list[ndarray]): List of prediction segmentation maps
+ gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
+ num_classes (int): Number of categories
+ ignore_index (int): Index that will be ignored in evaluation.
+
+ Returns:
+ float: Overall accuracy on all images.
+ ndarray: Per category accuracy, shape (num_classes, )
+ ndarray: Per category IoU, shape (num_classes, )
+ """
+
+ num_imgs = len(results)
+ assert len(gt_seg_maps) == num_imgs
+ total_area_intersect = np.zeros((num_classes, ), dtype=np.float)
+ total_area_union = np.zeros((num_classes, ), dtype=np.float)
+ total_area_pred_label = np.zeros((num_classes, ), dtype=np.float)
+ total_area_label = np.zeros((num_classes, ), dtype=np.float)
+ for i in range(num_imgs):
+ area_intersect, area_union, area_pred_label, area_label = \
+ intersect_and_union(results[i], gt_seg_maps[i], num_classes,
+ ignore_index=ignore_index)
+ total_area_intersect += area_intersect
+ total_area_union += area_union
+ total_area_pred_label += area_pred_label
+ total_area_label += area_label
+ all_acc = total_area_intersect.sum() / total_area_label.sum()
+ acc = total_area_intersect / total_area_label
+ iou = total_area_intersect / total_area_union
+
+ return all_acc, acc, iou
diff --git a/mmseg/core/seg/__init__.py b/mmseg/core/seg/__init__.py
new file mode 100644
index 0000000000..93bc129b68
--- /dev/null
+++ b/mmseg/core/seg/__init__.py
@@ -0,0 +1,4 @@
+from .builder import build_pixel_sampler
+from .sampler import BasePixelSampler, OHEMPixelSampler
+
+__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler']
diff --git a/mmseg/core/seg/builder.py b/mmseg/core/seg/builder.py
new file mode 100644
index 0000000000..f5a117ce7b
--- /dev/null
+++ b/mmseg/core/seg/builder.py
@@ -0,0 +1,8 @@
+from mmcv.utils import Registry, build_from_cfg
+
+PIXEL_SAMPLERS = Registry('pixel sampler')
+
+
+def build_pixel_sampler(cfg, **default_args):
+ """Build pixel sampler for segmentation map."""
+ return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args)
diff --git a/mmseg/core/seg/sampler/__init__.py b/mmseg/core/seg/sampler/__init__.py
new file mode 100644
index 0000000000..332b242c03
--- /dev/null
+++ b/mmseg/core/seg/sampler/__init__.py
@@ -0,0 +1,4 @@
+from .base_pixel_sampler import BasePixelSampler
+from .ohem_pixel_sampler import OHEMPixelSampler
+
+__all__ = ['BasePixelSampler', 'OHEMPixelSampler']
diff --git a/mmseg/core/seg/sampler/base_pixel_sampler.py b/mmseg/core/seg/sampler/base_pixel_sampler.py
new file mode 100644
index 0000000000..db322d199f
--- /dev/null
+++ b/mmseg/core/seg/sampler/base_pixel_sampler.py
@@ -0,0 +1,13 @@
+from abc import ABCMeta, abstractmethod
+
+
+class BasePixelSampler(metaclass=ABCMeta):
+ """Base class of pixel sampler."""
+
+ def __init__(self, **kwargs):
+ pass
+
+ @abstractmethod
+ def sample(self, seg_logit, seg_label):
+ """Placeholder for sample function."""
+ pass
diff --git a/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/mmseg/core/seg/sampler/ohem_pixel_sampler.py
new file mode 100644
index 0000000000..28c14ab5d1
--- /dev/null
+++ b/mmseg/core/seg/sampler/ohem_pixel_sampler.py
@@ -0,0 +1,64 @@
+import torch
+import torch.nn.functional as F
+
+from ..builder import PIXEL_SAMPLERS
+from .base_pixel_sampler import BasePixelSampler
+
+
+@PIXEL_SAMPLERS.register_module()
+class OHEMPixelSampler(BasePixelSampler):
+ """Online Hard Example Mining Sampler for segmentation.
+
+ Args:
+ thresh (float): The threshold for hard example selection. Below
+ which, are prediction with low confidence. Default: 0.7.
+ min_kept (int): The minimum number of predictions to keep.
+ Default: 100000.
+ ignore_index (int): The ignore index for training. Default: 255.
+ """
+
+ def __init__(self, thresh=0.7, min_kept=100000, ignore_index=255):
+ super(OHEMPixelSampler, self).__init__()
+ assert min_kept > 1
+ self.thresh = thresh
+ self.min_kept = min_kept
+ self.ignore_index = ignore_index
+
+ def sample(self, seg_logit, seg_label):
+ """
+
+ Args:
+ seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W)
+ seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W)
+
+ Returns:
+ torch.Tensor: segmentation weight, shape (N, H, W)
+
+ """
+ with torch.no_grad():
+ assert seg_logit.shape[2:] == seg_label.shape[2:]
+ assert seg_label.shape[1] == 1
+ seg_label = seg_label.squeeze(1).long()
+ batch_kept = self.min_kept * seg_label.size(0)
+ seg_prob = F.softmax(seg_logit, dim=1)
+ mask = seg_label.contiguous().view(-1, ) != self.ignore_index
+
+ tmp_seg_label = seg_label.clone()
+ tmp_seg_label[tmp_seg_label == self.ignore_index] = 0
+ seg_prob = seg_prob.gather(1, tmp_seg_label.unsqueeze(1))
+ sort_prob, sort_indices = seg_prob.contiguous().view(
+ -1, )[mask].contiguous().sort()
+
+ if sort_prob.numel() > 0:
+ min_threshold = sort_prob[min(batch_kept,
+ sort_prob.numel() - 1)]
+ else:
+ min_threshold = 0.0
+ threshold = max(min_threshold, self.thresh)
+
+ seg_weight = seg_logit.new_ones(size=seg_label.size())
+ seg_weight = seg_weight.view(-1)
+ seg_weight[mask][sort_prob < threshold] = 0.
+ seg_weight = seg_weight.view_as(seg_label)
+
+ return seg_weight
diff --git a/mmseg/core/utils/__init__.py b/mmseg/core/utils/__init__.py
new file mode 100644
index 0000000000..f2678b321c
--- /dev/null
+++ b/mmseg/core/utils/__init__.py
@@ -0,0 +1,3 @@
+from .misc import add_prefix
+
+__all__ = ['add_prefix']
diff --git a/mmseg/core/utils/misc.py b/mmseg/core/utils/misc.py
new file mode 100644
index 0000000000..eb862a82bd
--- /dev/null
+++ b/mmseg/core/utils/misc.py
@@ -0,0 +1,17 @@
+def add_prefix(inputs, prefix):
+ """Add prefix for dict.
+
+ Args:
+ inputs (dict): The input dict with str keys.
+ prefix (str): The prefix to add.
+
+ Returns:
+
+ dict: The dict with keys updated with ``prefix``.
+ """
+
+ outputs = dict()
+ for name, value in inputs.items():
+ outputs[f'{prefix}.{name}'] = value
+
+ return outputs
diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py
new file mode 100644
index 0000000000..cb81b9a2eb
--- /dev/null
+++ b/mmseg/datasets/__init__.py
@@ -0,0 +1,12 @@
+from .ade import ADE20KDataset
+from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
+from .cityscapes import CityscapesDataset
+from .custom import CustomDataset
+from .dataset_wrappers import ConcatDataset, RepeatDataset
+from .voc import PascalVOCDataset
+
+__all__ = [
+ 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
+ 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
+ 'PascalVOCDataset', 'ADE20KDataset'
+]
diff --git a/mmseg/datasets/ade.py b/mmseg/datasets/ade.py
new file mode 100644
index 0000000000..5913e43775
--- /dev/null
+++ b/mmseg/datasets/ade.py
@@ -0,0 +1,84 @@
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class ADE20KDataset(CustomDataset):
+ """ADE20K dataset.
+
+ In segmentation map annotation for ADE20K, 0 stands for background, which
+ is not included in 150 categories. ``reduce_zero_label`` is fixed to True.
+ The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to
+ '.png'.
+ """
+ CLASSES = (
+ 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ',
+ 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth',
+ 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car',
+ 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug',
+ 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe',
+ 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
+ 'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
+ 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path',
+ 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door',
+ 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table',
+ 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove',
+ 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar',
+ 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
+ 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver',
+ 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister',
+ 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van',
+ 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything',
+ 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent',
+ 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank',
+ 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake',
+ 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce',
+ 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen',
+ 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
+ 'clock', 'flag')
+
+ PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
+ [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
+ [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
+ [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
+ [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
+ [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
+ [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
+ [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
+ [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
+ [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
+ [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
+ [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
+ [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
+ [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
+ [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
+ [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
+ [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
+ [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
+ [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
+ [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
+ [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
+ [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
+ [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
+ [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
+ [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
+ [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
+ [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
+ [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
+ [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
+ [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
+ [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
+ [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
+ [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
+ [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
+ [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
+ [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
+ [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
+ [102, 255, 0], [92, 0, 255]]
+
+ def __init__(self, **kwargs):
+ super(ADE20KDataset, self).__init__(
+ img_suffix='.jpg',
+ seg_map_suffix='.png',
+ reduce_zero_label=True,
+ **kwargs)
diff --git a/mmseg/datasets/builder.py b/mmseg/datasets/builder.py
new file mode 100644
index 0000000000..f7a9926111
--- /dev/null
+++ b/mmseg/datasets/builder.py
@@ -0,0 +1,169 @@
+import copy
+import platform
+import random
+from functools import partial
+
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg
+from mmcv.utils.parrots_wrapper import DataLoader, PoolDataLoader
+from torch.utils.data import DistributedSampler
+
+if platform.system() != 'Windows':
+ # https://github.com/pytorch/pytorch/issues/973
+ import resource
+ rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+ hard_limit = rlimit[1]
+ soft_limit = min(4096, hard_limit)
+ resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
+
+
+def _concat_dataset(cfg, default_args=None):
+ """Build :obj:`ConcatDataset by."""
+ from .dataset_wrappers import ConcatDataset
+ img_dir = cfg['img_dir']
+ ann_dir = cfg.get('ann_dir', None)
+ split = cfg.get('split', None)
+ num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1
+ if ann_dir is not None:
+ num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1
+ else:
+ num_ann_dir = 0
+ if split is not None:
+ num_split = len(split) if isinstance(split, (list, tuple)) else 1
+ else:
+ num_split = 0
+ if num_img_dir > 1:
+ assert num_img_dir == num_ann_dir or num_ann_dir == 0
+ assert num_img_dir == num_split or num_split == 0
+ else:
+ assert num_split == num_ann_dir or num_ann_dir <= 1
+ num_dset = max(num_split, num_img_dir)
+
+ datasets = []
+ for i in range(num_dset):
+ data_cfg = copy.deepcopy(cfg)
+ if isinstance(img_dir, (list, tuple)):
+ data_cfg['img_dir'] = img_dir[i]
+ if isinstance(ann_dir, (list, tuple)):
+ data_cfg['ann_dir'] = ann_dir[i]
+ if isinstance(split, (list, tuple)):
+ data_cfg['split'] = split[i]
+ datasets.append(build_dataset(data_cfg, default_args))
+
+ return ConcatDataset(datasets)
+
+
+def build_dataset(cfg, default_args=None):
+ """Build datasets."""
+ from .dataset_wrappers import ConcatDataset, RepeatDataset
+ if isinstance(cfg, (list, tuple)):
+ dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+ elif cfg['type'] == 'RepeatDataset':
+ dataset = RepeatDataset(
+ build_dataset(cfg['dataset'], default_args), cfg['times'])
+ elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance(
+ cfg.get('split', None), (list, tuple)):
+ dataset = _concat_dataset(cfg, default_args)
+ else:
+ dataset = build_from_cfg(cfg, DATASETS, default_args)
+
+ return dataset
+
+
+def build_dataloader(dataset,
+ samples_per_gpu,
+ workers_per_gpu,
+ num_gpus=1,
+ dist=True,
+ shuffle=True,
+ seed=None,
+ drop_last=False,
+ pin_memory=True,
+ dataloader_type='PoolDataLoader',
+ **kwargs):
+ """Build PyTorch DataLoader.
+
+ In distributed training, each GPU/process has a dataloader.
+ In non-distributed training, there is only one dataloader for all GPUs.
+
+ Args:
+ dataset (Dataset): A PyTorch dataset.
+ samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+ batch size of each GPU.
+ workers_per_gpu (int): How many subprocesses to use for data loading
+ for each GPU.
+ num_gpus (int): Number of GPUs. Only used in non-distributed training.
+ dist (bool): Distributed training/test or not. Default: True.
+ shuffle (bool): Whether to shuffle the data at every epoch.
+ Default: True.
+ seed (int | None): Seed to be used. Default: None.
+ drop_last (bool): Whether to drop the last incomplete batch in epoch.
+ Default: False
+ pin_memory (bool): Whether to use pin_memory in DataLoader.
+ Default: True
+ dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader'
+ kwargs: any keyword argument to be used to initialize DataLoader
+
+ Returns:
+ DataLoader: A PyTorch dataloader.
+ """
+ rank, world_size = get_dist_info()
+ if dist:
+ sampler = DistributedSampler(
+ dataset, world_size, rank, shuffle=shuffle)
+ shuffle = False
+ batch_size = samples_per_gpu
+ num_workers = workers_per_gpu
+ else:
+ sampler = None
+ batch_size = num_gpus * samples_per_gpu
+ num_workers = num_gpus * workers_per_gpu
+
+ init_fn = partial(
+ worker_init_fn, num_workers=num_workers, rank=rank,
+ seed=seed) if seed is not None else None
+
+ assert dataloader_type in (
+ 'DataLoader',
+ 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}'
+
+ if dataloader_type == 'PoolDataLoader':
+ dataloader = PoolDataLoader
+ elif dataloader_type == 'DataLoader':
+ dataloader = DataLoader
+
+ data_loader = dataloader(
+ dataset,
+ batch_size=batch_size,
+ sampler=sampler,
+ num_workers=num_workers,
+ collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ **kwargs)
+
+ return data_loader
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+ """Worker init func for dataloader.
+
+ The seed of each worker equals to num_worker * rank + worker_id + user_seed
+
+ Args:
+ worker_id (int): Worker id.
+ num_workers (int): Number of workers.
+ rank (int): The rank of current process.
+ seed (int): The random seed to use.
+ """
+
+ worker_seed = num_workers * rank + worker_id + seed
+ np.random.seed(worker_seed)
+ random.seed(worker_seed)
diff --git a/mmseg/datasets/cityscapes.py b/mmseg/datasets/cityscapes.py
new file mode 100644
index 0000000000..9a12ab1724
--- /dev/null
+++ b/mmseg/datasets/cityscapes.py
@@ -0,0 +1,213 @@
+import os.path as osp
+import tempfile
+
+import mmcv
+import numpy as np
+from mmcv.utils import print_log
+from PIL import Image
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class CityscapesDataset(CustomDataset):
+ """Cityscapes dataset.
+
+ The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is
+ fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset.
+ """
+
+ CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
+ 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
+ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+ 'bicycle')
+
+ PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
+ [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
+ [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
+ [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+ [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+
+ def __init__(self, **kwargs):
+ super(CityscapesDataset, self).__init__(
+ img_suffix='_leftImg8bit.png',
+ seg_map_suffix='_gtFine_labelTrainIds.png',
+ **kwargs)
+
+ @staticmethod
+ def _convert_to_label_id(result):
+ """Convert trainId to id for cityscapes."""
+ import cityscapesscripts.helpers.labels as CSLabels
+ result_copy = result.copy()
+ for trainId, label in CSLabels.trainId2label.items():
+ result_copy[result == trainId] = label.id
+
+ return result_copy
+
+ def results2img(self, results, imgfile_prefix, to_label_id):
+ """Write the segmentation results to images.
+
+ Args:
+ results (list[list | tuple | ndarray]): Testing results of the
+ dataset.
+ imgfile_prefix (str): The filename prefix of the png files.
+ If the prefix is "somepath/xxx",
+ the png files will be named "somepath/xxx.png".
+ to_label_id (bool): whether convert output to label_id for
+ submission
+
+ Returns:
+ list[str: str]: result txt files which contains corresponding
+ semantic segmentation images.
+ """
+ result_files = []
+ prog_bar = mmcv.ProgressBar(len(self))
+ for idx in range(len(self)):
+ result = results[idx]
+ if to_label_id:
+ result = self._convert_to_label_id(result)
+ filename = self.img_infos[idx]['filename']
+ basename = osp.splitext(osp.basename(filename))[0]
+
+ png_filename = osp.join(imgfile_prefix, f'{basename}.png')
+
+ output = Image.fromarray(result.astype(np.uint8)).convert('P')
+ import cityscapesscripts.helpers.labels as CSLabels
+ palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8)
+ for label_id, label in CSLabels.id2label.items():
+ palette[label_id] = label.color
+
+ output.putpalette(palette)
+ output.save(png_filename)
+ result_files.append(png_filename)
+ prog_bar.update()
+
+ return result_files
+
+ def format_results(self, results, imgfile_prefix=None, to_label_id=True):
+ """Format the results into dir (standard format for Cityscapes
+ evaluation).
+
+ Args:
+ results (list): Testing results of the dataset.
+ imgfile_prefix (str | None): The prefix of images files. It
+ includes the file path and the prefix of filename, e.g.,
+ "a/b/prefix". If not specified, a temp file will be created.
+ Default: None.
+ to_label_id (bool): whether convert output to label_id for
+ submission. Default: False
+
+ Returns:
+ tuple: (result_files, tmp_dir), result_files is a list containing
+ the image paths, tmp_dir is the temporal directory created
+ for saving json/png files when img_prefix is not specified.
+ """
+
+ assert isinstance(results, list), 'results must be a list'
+ assert len(results) == len(self), (
+ 'The length of results is not equal to the dataset len: '
+ f'{len(results)} != {len(self)}')
+
+ if imgfile_prefix is None:
+ tmp_dir = tempfile.TemporaryDirectory()
+ imgfile_prefix = tmp_dir.name
+ else:
+ tmp_dir = None
+ result_files = self.results2img(results, imgfile_prefix, to_label_id)
+
+ return result_files, tmp_dir
+
+ def evaluate(self,
+ results,
+ metric='mIoU',
+ logger=None,
+ imgfile_prefix=None):
+ """Evaluation in Cityscapes/default protocol.
+
+ Args:
+ results (list): Testing results of the dataset.
+ metric (str | list[str]): Metrics to be evaluated.
+ logger (logging.Logger | None | str): Logger used for printing
+ related information during evaluation. Default: None.
+ imgfile_prefix (str | None): The prefix of output image file,
+ for cityscapes evaluation only. It includes the file path and
+ the prefix of filename, e.g., "a/b/prefix".
+ If results are evaluated with cityscapes protocol, it would be
+ the prefix of output png files. The output files would be
+ png images under folder "a/b/prefix/xxx/", where "xxx" is the
+ video name of cityscapes. If not specified, a temp file will
+ be created.
+ Default: None.
+
+ Returns:
+ dict[str, float]: Cityscapes/default metrics.
+ """
+
+ eval_results = dict()
+ metrics = metric.copy() if isinstance(metric, list) else [metric]
+ if 'cityscapes' in metrics:
+ eval_results.update(
+ self._evaluate_cityscapes(results, logger, imgfile_prefix))
+ metrics.remove('cityscapes')
+ if len(metrics) > 0:
+ eval_results.update(
+ super(CityscapesDataset,
+ self).evaluate(results, metrics, logger))
+
+ return eval_results
+
+ def _evaluate_cityscapes(self, results, logger, imgfile_prefix):
+ """Evaluation in Cityscapes protocol.
+
+ Args:
+ results (list): Testing results of the dataset.
+ logger (logging.Logger | str | None): Logger used for printing
+ related information during evaluation. Default: None.
+ imgfile_prefix (str | None): The prefix of output image file
+
+ Returns:
+ dict[str: float]: Cityscapes evaluation results.
+ """
+ try:
+ import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa
+ except ImportError:
+ raise ImportError('Please run "pip install citscapesscripts" to '
+ 'install cityscapesscripts first.')
+ msg = 'Evaluating in Cityscapes style'
+ if logger is None:
+ msg = '\n' + msg
+ print_log(msg, logger=logger)
+
+ result_files, tmp_dir = self.format_results(results, imgfile_prefix)
+
+ if tmp_dir is None:
+ result_dir = imgfile_prefix
+ else:
+ result_dir = tmp_dir.name
+
+ eval_results = dict()
+ print_log(f'Evaluating results under {result_dir} ...', logger=logger)
+
+ CSEval.args.evalInstLevelScore = True
+ CSEval.args.predictionPath = osp.abspath(result_dir)
+ CSEval.args.evalPixelAccuracy = True
+ CSEval.args.JSONOutput = False
+
+ seg_map_list = []
+ pred_list = []
+
+ # when evaluating with official cityscapesscripts,
+ # **_gtFine_labelIds.png is used
+ for seg_map in mmcv.scandir(
+ self.ann_dir, 'gtFine_labelIds.png', recursive=True):
+ seg_map_list.append(osp.join(self.ann_dir, seg_map))
+ pred_list.append(CSEval.getPrediction(CSEval.args, seg_map))
+
+ eval_results.update(
+ CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args))
+
+ if tmp_dir is not None:
+ tmp_dir.cleanup()
+
+ return eval_results
diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
new file mode 100644
index 0000000000..92d17c5252
--- /dev/null
+++ b/mmseg/datasets/custom.py
@@ -0,0 +1,291 @@
+import os.path as osp
+from functools import reduce
+
+import mmcv
+import numpy as np
+from mmcv.utils import print_log
+from torch.utils.data import Dataset
+
+from mmseg.core import mean_iou
+from mmseg.utils import get_root_logger
+from .builder import DATASETS
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class CustomDataset(Dataset):
+ """Custom dataset for semantic segmentation.
+
+ An example of file structure is as followed.
+
+ .. code-block:: none
+
+ ├── data
+ │ ├── my_dataset
+ │ │ ├── img_dir
+ │ │ │ ├── train
+ │ │ │ │ ├── xxx{img_suffix}
+ │ │ │ │ ├── yyy{img_suffix}
+ │ │ │ │ ├── zzz{img_suffix}
+ │ │ │ ├── val
+ │ │ ├── ann_dir
+ │ │ │ ├── train
+ │ │ │ │ ├── xxx{seg_map_suffix}
+ │ │ │ │ ├── yyy{seg_map_suffix}
+ │ │ │ │ ├── zzz{seg_map_suffix}
+ │ │ │ ├── val
+
+ The img/gt_semantic_seg pair of CustomDataset should be of the same
+ except suffix. A valid img/gt_semantic_seg filename pair should be like
+ ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included
+ in the suffix). If split is given, then ``xxx`` is specified in txt file.
+ Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded.
+ Please refer to ``docs/tutorials/new_dataset.md`` for more details.
+
+
+ Args:
+ pipeline (list[dict]): Processing pipeline
+ img_dir (str): Path to image directory
+ img_suffix (str): Suffix of images. Default: '.jpg'
+ ann_dir (str, optional): Path to annotation directory. Default: None
+ seg_map_suffix (str): Suffix of segmentation maps. Default: '.png'
+ split (str, optional): Split txt file. If split is specified, only
+ file with suffix in the splits will be loaded. Otherwise, all
+ images in img_dir/ann_dir will be loaded. Default: None
+ data_root (str, optional): Data root for img_dir/ann_dir. Default:
+ None.
+ test_mode (bool): If test_mode=True, gt wouldn't be loaded.
+ ignore_index (int): The label index to be ignored. Default: 255
+ reduce_zero_label (bool): Whether to mark label zero as ignored.
+ Default: False
+ """
+
+ CLASSES = None
+
+ PALETTE = None
+
+ def __init__(self,
+ pipeline,
+ img_dir,
+ img_suffix='.jpg',
+ ann_dir=None,
+ seg_map_suffix='.png',
+ split=None,
+ data_root=None,
+ test_mode=False,
+ ignore_index=255,
+ reduce_zero_label=False):
+ self.pipeline = Compose(pipeline)
+ self.img_dir = img_dir
+ self.img_suffix = img_suffix
+ self.ann_dir = ann_dir
+ self.seg_map_suffix = seg_map_suffix
+ self.split = split
+ self.data_root = data_root
+ self.test_mode = test_mode
+ self.ignore_index = ignore_index
+ self.reduce_zero_label = reduce_zero_label
+
+ # join paths if data_root is specified
+ if self.data_root is not None:
+ if not osp.isabs(self.img_dir):
+ self.img_dir = osp.join(self.data_root, self.img_dir)
+ if not (self.ann_dir is None or osp.isabs(self.ann_dir)):
+ self.ann_dir = osp.join(self.data_root, self.ann_dir)
+ if not (self.split is None or osp.isabs(self.split)):
+ self.split = osp.join(self.data_root, self.split)
+
+ # load annotations
+ self.img_infos = self.load_annotations(self.img_dir, self.img_suffix,
+ self.ann_dir,
+ self.seg_map_suffix, self.split)
+
+ def __len__(self):
+ """Total number of samples of data."""
+ return len(self.img_infos)
+
+ def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix,
+ split):
+ """Load annotation from directory.
+
+ Args:
+ img_dir (str): Path to image directory
+ img_suffix (str): Suffix of images.
+ ann_dir (str|None): Path to annotation directory.
+ seg_map_suffix (str|None): Suffix of segmentation maps.
+ split (str|None): Split txt file. If split is specified, only file
+ with suffix in the splits will be loaded. Otherwise, all images
+ in img_dir/ann_dir will be loaded. Default: None
+
+ Returns:
+ list[dict]: All image info of dataset.
+ """
+
+ img_infos = []
+ if split is not None:
+ with open(split) as f:
+ for line in f:
+ img_name = line.strip()
+ img_file = osp.join(img_dir, img_name + img_suffix)
+ img_info = dict(filename=img_file)
+ if ann_dir is not None:
+ seg_map = osp.join(ann_dir, img_name + seg_map_suffix)
+ img_info['ann'] = dict(seg_map=seg_map)
+ img_infos.append(img_info)
+ else:
+ for img in mmcv.scandir(img_dir, img_suffix, recursive=True):
+ img_file = osp.join(img_dir, img)
+ img_info = dict(filename=img_file)
+ if ann_dir is not None:
+ seg_map = osp.join(ann_dir,
+ img.replace(img_suffix, seg_map_suffix))
+ img_info['ann'] = dict(seg_map=seg_map)
+ img_infos.append(img_info)
+
+ print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger())
+ return img_infos
+
+ def get_ann_info(self, idx):
+ """Get annotation by index.
+
+ Args:
+ idx (int): Index of data.
+
+ Returns:
+ dict: Annotation info of specified index.
+ """
+
+ return self.img_infos[idx]['ann']
+
+ def pre_pipeline(self, results):
+ """Prepare results dict for pipeline."""
+ results['seg_fields'] = []
+
+ def __getitem__(self, idx):
+ """Get training/test data after pipeline.
+
+ Args:
+ idx (int): Index of data.
+
+ Returns:
+ dict: Training/test data (with annotation if `test_mode` is set
+ False).
+ """
+
+ if self.test_mode:
+ return self.prepare_test_img(idx)
+ else:
+ return self.prepare_train_img(idx)
+
+ def prepare_train_img(self, idx):
+ """Get training data and annotations after pipeline.
+
+ Args:
+ idx (int): Index of data.
+
+ Returns:
+ dict: Training data and annotation after pipeline with new keys
+ introduced by pipeline.
+ """
+
+ img_info = self.img_infos[idx]
+ ann_info = self.get_ann_info(idx)
+ results = dict(img_info=img_info, ann_info=ann_info)
+ self.pre_pipeline(results)
+ return self.pipeline(results)
+
+ def prepare_test_img(self, idx):
+ """Get testing data after pipeline.
+
+ Args:
+ idx (int): Index of data.
+
+ Returns:
+ dict: Testing data after pipeline with new keys intorduced by
+ piepline.
+ """
+
+ img_info = self.img_infos[idx]
+ results = dict(img_info=img_info)
+ self.pre_pipeline(results)
+ return self.pipeline(results)
+
+ def format_results(self, results, **kwargs):
+ """Place holder to format result to dataset specific output."""
+ pass
+
+ def get_gt_seg_maps(self):
+ """Get ground truth segmentation maps for evaluation."""
+ gt_seg_maps = []
+ for img_info in self.img_infos:
+ gt_seg_map = mmcv.imread(
+ img_info['ann']['seg_map'], flag='unchanged', backend='pillow')
+ if self.reduce_zero_label:
+ # avoid using underflow conversion
+ gt_seg_map[gt_seg_map == 0] = 255
+ gt_seg_map = gt_seg_map - 1
+ gt_seg_map[gt_seg_map == 254] = 255
+
+ gt_seg_maps.append(gt_seg_map)
+
+ return gt_seg_maps
+
+ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
+ """Evaluate the dataset.
+
+ Args:
+ results (list): Testing results of the dataset.
+ metric (str | list[str]): Metrics to be evaluated.
+ logger (logging.Logger | None | str): Logger used for printing
+ related information during evaluation. Default: None.
+
+ Returns:
+ dict[str, float]: Default metrics.
+ """
+
+ if not isinstance(metric, str):
+ assert len(metric) == 1
+ metric = metric[0]
+ allowed_metrics = ['mIoU']
+ if metric not in allowed_metrics:
+ raise KeyError('metric {} is not supported'.format(metric))
+
+ eval_results = {}
+ gt_seg_maps = self.get_gt_seg_maps()
+ if self.CLASSES is None:
+ num_classes = len(
+ reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps]))
+ else:
+ num_classes = len(self.CLASSES)
+
+ all_acc, acc, iou = mean_iou(
+ results, gt_seg_maps, num_classes, ignore_index=self.ignore_index)
+ summary_str = ''
+ summary_str += 'per class results:\n'
+
+ line_format = '{:<15} {:>10} {:>10}\n'
+ summary_str += line_format.format('Class', 'IoU', 'Acc')
+ if self.CLASSES is None:
+ class_names = tuple(range(num_classes))
+ else:
+ class_names = self.CLASSES
+ for i in range(num_classes):
+ iou_str = '{:.2f}'.format(iou[i] * 100)
+ acc_str = '{:.2f}'.format(acc[i] * 100)
+ summary_str += line_format.format(class_names[i], iou_str, acc_str)
+ summary_str += 'Summary:\n'
+ line_format = '{:<15} {:>10} {:>10} {:>10}\n'
+ summary_str += line_format.format('Scope', 'mIoU', 'mAcc', 'aAcc')
+
+ iou_str = '{:.2f}'.format(np.nanmean(iou) * 100)
+ acc_str = '{:.2f}'.format(np.nanmean(acc) * 100)
+ all_acc_str = '{:.2f}'.format(all_acc * 100)
+ summary_str += line_format.format('global', iou_str, acc_str,
+ all_acc_str)
+ print_log(summary_str, logger)
+
+ eval_results['mIoU'] = np.nanmean(iou)
+ eval_results['mAcc'] = np.nanmean(acc)
+ eval_results['aAcc'] = all_acc
+
+ return eval_results
diff --git a/mmseg/datasets/dataset_wrappers.py b/mmseg/datasets/dataset_wrappers.py
new file mode 100644
index 0000000000..d6a5e957ec
--- /dev/null
+++ b/mmseg/datasets/dataset_wrappers.py
@@ -0,0 +1,50 @@
+from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
+
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class ConcatDataset(_ConcatDataset):
+ """A wrapper of concatenated dataset.
+
+ Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
+ concat the group flag for image aspect ratio.
+
+ Args:
+ datasets (list[:obj:`Dataset`]): A list of datasets.
+ """
+
+ def __init__(self, datasets):
+ super(ConcatDataset, self).__init__(datasets)
+ self.CLASSES = datasets[0].CLASSES
+ self.PALETTE = datasets[0].PALETTE
+
+
+@DATASETS.register_module()
+class RepeatDataset(object):
+ """A wrapper of repeated dataset.
+
+ The length of repeated dataset will be `times` larger than the original
+ dataset. This is useful when the data loading time is long but the dataset
+ is small. Using RepeatDataset can reduce the data loading time between
+ epochs.
+
+ Args:
+ dataset (:obj:`Dataset`): The dataset to be repeated.
+ times (int): Repeat times.
+ """
+
+ def __init__(self, dataset, times):
+ self.dataset = dataset
+ self.times = times
+ self.CLASSES = dataset.CLASSES
+ self.PALETTE = dataset.PALETTE
+ self._ori_len = len(self.dataset)
+
+ def __getitem__(self, idx):
+ """Get item from original dataset."""
+ return self.dataset[idx % self._ori_len]
+
+ def __len__(self):
+ """The length is multiplied by ``times``"""
+ return self.times * self._ori_len
diff --git a/mmseg/datasets/pipelines/__init__.py b/mmseg/datasets/pipelines/__init__.py
new file mode 100644
index 0000000000..e45f495070
--- /dev/null
+++ b/mmseg/datasets/pipelines/__init__.py
@@ -0,0 +1,14 @@
+from .compose import Compose
+from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor,
+ Transpose, to_tensor)
+from .loading import LoadAnnotations, LoadImageFromFile
+from .test_time_aug import MultiScaleFlipAug
+from .transforms import (Normalize, Pad, PhotoMetricDistortion, RandomCrop,
+ RandomFlip, Resize, SegRescale)
+
+__all__ = [
+ 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
+ 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
+ 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop',
+ 'Normalize', 'SegRescale', 'PhotoMetricDistortion'
+]
diff --git a/mmseg/datasets/pipelines/compose.py b/mmseg/datasets/pipelines/compose.py
new file mode 100644
index 0000000000..ca48f1c935
--- /dev/null
+++ b/mmseg/datasets/pipelines/compose.py
@@ -0,0 +1,51 @@
+import collections
+
+from mmcv.utils import build_from_cfg
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class Compose(object):
+ """Compose multiple transforms sequentially.
+
+ Args:
+ transforms (Sequence[dict | callable]): Sequence of transform object or
+ config dict to be composed.
+ """
+
+ def __init__(self, transforms):
+ assert isinstance(transforms, collections.abc.Sequence)
+ self.transforms = []
+ for transform in transforms:
+ if isinstance(transform, dict):
+ transform = build_from_cfg(transform, PIPELINES)
+ self.transforms.append(transform)
+ elif callable(transform):
+ self.transforms.append(transform)
+ else:
+ raise TypeError('transform must be callable or a dict')
+
+ def __call__(self, data):
+ """Call function to apply transforms sequentially.
+
+ Args:
+ data (dict): A result dict contains the data to transform.
+
+ Returns:
+ dict: Transformed data.
+ """
+
+ for t in self.transforms:
+ data = t(data)
+ if data is None:
+ return None
+ return data
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ for t in self.transforms:
+ format_string += '\n'
+ format_string += f' {t}'
+ format_string += '\n)'
+ return format_string
diff --git a/mmseg/datasets/pipelines/formating.py b/mmseg/datasets/pipelines/formating.py
new file mode 100644
index 0000000000..e7029a8bac
--- /dev/null
+++ b/mmseg/datasets/pipelines/formating.py
@@ -0,0 +1,288 @@
+from collections.abc import Sequence
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.parallel import DataContainer as DC
+
+from ..builder import PIPELINES
+
+
+def to_tensor(data):
+ """Convert objects of various python types to :obj:`torch.Tensor`.
+
+ Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
+ :class:`Sequence`, :class:`int` and :class:`float`.
+
+ Args:
+ data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
+ be converted.
+ """
+
+ if isinstance(data, torch.Tensor):
+ return data
+ elif isinstance(data, np.ndarray):
+ return torch.from_numpy(data)
+ elif isinstance(data, Sequence) and not mmcv.is_str(data):
+ return torch.tensor(data)
+ elif isinstance(data, int):
+ return torch.LongTensor([data])
+ elif isinstance(data, float):
+ return torch.FloatTensor([data])
+ else:
+ raise TypeError(f'type {type(data)} cannot be converted to tensor.')
+
+
+@PIPELINES.register_module()
+class ToTensor(object):
+ """Convert some results to :obj:`torch.Tensor` by given keys.
+
+ Args:
+ keys (Sequence[str]): Keys that need to be converted to Tensor.
+ """
+
+ def __init__(self, keys):
+ self.keys = keys
+
+ def __call__(self, results):
+ """Call function to convert data in results to :obj:`torch.Tensor`.
+
+ Args:
+ results (dict): Result dict contains the data to convert.
+
+ Returns:
+ dict: The result dict contains the data converted
+ to :obj:`torch.Tensor`.
+ """
+
+ for key in self.keys:
+ results[key] = to_tensor(results[key])
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + f'(keys={self.keys})'
+
+
+@PIPELINES.register_module()
+class ImageToTensor(object):
+ """Convert image to :obj:`torch.Tensor` by given keys.
+
+ The dimension order of input image is (H, W, C). The pipeline will convert
+ it to (C, H, W). If only 2 dimension (H, W) is given, the output would be
+ (1, H, W).
+
+ Args:
+ keys (Sequence[str]): Key of images to be converted to Tensor.
+ """
+
+ def __init__(self, keys):
+ self.keys = keys
+
+ def __call__(self, results):
+ """Call function to convert image in results to :obj:`torch.Tensor` and
+ transpose the channel order.
+
+ Args:
+ results (dict): Result dict contains the image data to convert.
+
+ Returns:
+ dict: The result dict contains the image converted
+ to :obj:`torch.Tensor` and transposed to (C, H, W) order.
+ """
+
+ for key in self.keys:
+ img = results[key]
+ if len(img.shape) < 3:
+ img = np.expand_dims(img, -1)
+ results[key] = to_tensor(img.transpose(2, 0, 1))
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + f'(keys={self.keys})'
+
+
+@PIPELINES.register_module()
+class Transpose(object):
+ """Transpose some results by given keys.
+
+ Args:
+ keys (Sequence[str]): Keys of results to be transposed.
+ order (Sequence[int]): Order of transpose.
+ """
+
+ def __init__(self, keys, order):
+ self.keys = keys
+ self.order = order
+
+ def __call__(self, results):
+ """Call function to convert image in results to :obj:`torch.Tensor` and
+ transpose the channel order.
+
+ Args:
+ results (dict): Result dict contains the image data to convert.
+
+ Returns:
+ dict: The result dict contains the image converted
+ to :obj:`torch.Tensor` and transposed to (C, H, W) order.
+ """
+
+ for key in self.keys:
+ results[key] = results[key].transpose(self.order)
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + \
+ f'(keys={self.keys}, order={self.order})'
+
+
+@PIPELINES.register_module()
+class ToDataContainer(object):
+ """Convert results to :obj:`mmcv.DataContainer` by given fields.
+
+ Args:
+ fields (Sequence[dict]): Each field is a dict like
+ ``dict(key='xxx', **kwargs)``. The ``key`` in result will
+ be converted to :obj:`mmcv.DataContainer` with ``**kwargs``.
+ Default: ``(dict(key='img', stack=True),
+ dict(key='gt_semantic_seg'))``.
+ """
+
+ def __init__(self,
+ fields=(dict(key='img',
+ stack=True), dict(key='gt_semantic_seg'))):
+ self.fields = fields
+
+ def __call__(self, results):
+ """Call function to convert data in results to
+ :obj:`mmcv.DataContainer`.
+
+ Args:
+ results (dict): Result dict contains the data to convert.
+
+ Returns:
+ dict: The result dict contains the data converted to
+ :obj:`mmcv.DataContainer`.
+ """
+
+ for field in self.fields:
+ field = field.copy()
+ key = field.pop('key')
+ results[key] = DC(results[key], **field)
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + f'(fields={self.fields})'
+
+
+@PIPELINES.register_module()
+class DefaultFormatBundle(object):
+ """Default formatting bundle.
+
+ It simplifies the pipeline of formatting common fields, including "img"
+ and "gt_semantic_seg". These fields are formatted as follows.
+
+ - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+ - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
+ (3)to DataContainer (stack=True)
+ """
+
+ def __call__(self, results):
+ """Call function to transform and format common fields in results.
+
+ Args:
+ results (dict): Result dict contains the data to convert.
+
+ Returns:
+ dict: The result dict contains the data that is formatted with
+ default bundle.
+ """
+
+ if 'img' in results:
+ img = results['img']
+ if len(img.shape) < 3:
+ img = np.expand_dims(img, -1)
+ img = np.ascontiguousarray(img.transpose(2, 0, 1))
+ results['img'] = DC(to_tensor(img), stack=True)
+ if 'gt_semantic_seg' in results:
+ # convert to long
+ results['gt_semantic_seg'] = DC(
+ to_tensor(results['gt_semantic_seg'][None,
+ ...].astype(np.int64)),
+ stack=True)
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__
+
+
+@PIPELINES.register_module()
+class Collect(object):
+ """Collect data from the loader relevant to the specific task.
+
+ This is usually the last stage of the data loader pipeline. Typically keys
+ is set to some subset of "img", "gt_semantic_seg".
+
+ The "img_meta" item is always populated. The contents of the "img_meta"
+ dictionary depends on "meta_keys". By default this includes:
+
+ - "img_shape": shape of the image input to the network as a tuple
+ (h, w, c). Note that images may be zero padded on the bottom/right
+ if the batch tensor is larger than this shape.
+
+ - "scale_factor": a float indicating the preprocessing scale
+
+ - "flip": a boolean indicating if image flip transform was used
+
+ - "filename": path to the image file
+
+ - "ori_shape": original shape of the image as a tuple (h, w, c)
+
+ - "pad_shape": image shape after padding
+
+ - "img_norm_cfg": a dict of normalization information:
+ - mean - per channel mean subtraction
+ - std - per channel std divisor
+ - to_rgb - bool indicating if bgr was converted to rgb
+
+ Args:
+ keys (Sequence[str]): Keys of results to be collected in ``data``.
+ meta_keys (Sequence[str], optional): Meta keys to be converted to
+ ``mmcv.DataContainer`` and collected in ``data[img_metas]``.
+ Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
+ 'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+ 'img_norm_cfg')``
+ """
+
+ def __init__(self,
+ keys,
+ meta_keys=('filename', 'ori_filename', 'ori_shape',
+ 'img_shape', 'pad_shape', 'scale_factor', 'flip',
+ 'flip_direction', 'img_norm_cfg')):
+ self.keys = keys
+ self.meta_keys = meta_keys
+
+ def __call__(self, results):
+ """Call function to collect keys in results. The keys in ``meta_keys``
+ will be converted to :obj:mmcv.DataContainer.
+
+ Args:
+ results (dict): Result dict contains the data to collect.
+
+ Returns:
+ dict: The result dict contains the following keys
+ - keys in``self.keys``
+ - ``img_metas``
+ """
+
+ data = {}
+ img_meta = {}
+ for key in self.meta_keys:
+ img_meta[key] = results[key]
+ data['img_metas'] = DC(img_meta, cpu_only=True)
+ for key in self.keys:
+ data[key] = results[key]
+ return data
+
+ def __repr__(self):
+ return self.__class__.__name__ + \
+ f'(keys={self.keys}, meta_keys={self.meta_keys})'
diff --git a/mmseg/datasets/pipelines/loading.py b/mmseg/datasets/pipelines/loading.py
new file mode 100644
index 0000000000..9786269106
--- /dev/null
+++ b/mmseg/datasets/pipelines/loading.py
@@ -0,0 +1,149 @@
+import os.path as osp
+
+import mmcv
+import numpy as np
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class LoadImageFromFile(object):
+ """Load an image from file.
+
+ Required keys are "img_prefix" and "img_info" (a dict that must contain the
+ key "filename"). Added or updated keys are "filename", "img", "img_shape",
+ "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
+ "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
+
+ Args:
+ to_float32 (bool): Whether to convert the loaded image to a float32
+ numpy array. If set to False, the loaded image is an uint8 array.
+ Defaults to False.
+ color_type (str): The flag argument for :func:`mmcv.imfrombytes`.
+ Defaults to 'color'.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmcv.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default:
+ 'cv2'
+ """
+
+ def __init__(self,
+ to_float32=False,
+ color_type='color',
+ file_client_args=dict(backend='disk'),
+ imdecode_backend='cv2'):
+ self.to_float32 = to_float32
+ self.color_type = color_type
+ self.file_client_args = file_client_args.copy()
+ self.file_client = None
+ self.imdecode_backend = imdecode_backend
+
+ def __call__(self, results):
+ """Call functions to load image and get image meta information.
+
+ Args:
+ results (dict): Result dict from :obj:`mmseg.CustomDataset`.
+
+ Returns:
+ dict: The dict contains loaded image and meta information.
+ """
+
+ if self.file_client is None:
+ self.file_client = mmcv.FileClient(**self.file_client_args)
+
+ if results.get('img_prefix') is not None:
+ filename = osp.join(results['img_prefix'],
+ results['img_info']['filename'])
+ else:
+ filename = results['img_info']['filename']
+ img_bytes = self.file_client.get(filename)
+ img = mmcv.imfrombytes(
+ img_bytes, flag=self.color_type, backend=self.imdecode_backend)
+ if self.to_float32:
+ img = img.astype(np.float32)
+
+ results['filename'] = filename
+ results['ori_filename'] = results['img_info']['filename']
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ # Set initial values for default meta_keys
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = 1.0
+ num_channels = 1 if len(img.shape) < 3 else img.shape[2]
+ results['img_norm_cfg'] = dict(
+ mean=np.zeros(num_channels, dtype=np.float32),
+ std=np.ones(num_channels, dtype=np.float32),
+ to_rgb=False)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(to_float32={self.to_float32},'
+ repr_str += f"color_type='{self.color_type}',"
+ repr_str += f"imdecode_backend='{self.imdecode_backend}')"
+ return repr_str
+
+
+@PIPELINES.register_module()
+class LoadAnnotations(object):
+ """Load annotations for semantic segmentation.
+
+ Args:
+ reduct_zero_label (bool): Whether reduce all label value by 1.
+ Usually used for datasets where 0 is background label.
+ Default: False.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmcv.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default:
+ 'pillow'
+ """
+
+ def __init__(self,
+ reduce_zero_label=False,
+ file_client_args=dict(backend='disk'),
+ imdecode_backend='pillow'):
+ self.reduce_zero_label = reduce_zero_label
+ self.file_client_args = file_client_args.copy()
+ self.file_client = None
+ self.imdecode_backend = imdecode_backend
+
+ def __call__(self, results):
+ """Call function to load multiple types annotations.
+
+ Args:
+ results (dict): Result dict from :obj:`mmseg.CustomDataset`.
+
+ Returns:
+ dict: The dict contains loaded semantic segmentation annotations.
+ """
+
+ if self.file_client is None:
+ self.file_client = mmcv.FileClient(**self.file_client_args)
+
+ if results.get('seg_prefix', None) is not None:
+ filename = osp.join(results['seg_prefix'],
+ results['ann_info']['seg_map'])
+ else:
+ filename = results['ann_info']['seg_map']
+ img_bytes = self.file_client.get(filename)
+ gt_semantic_seg = mmcv.imfrombytes(
+ img_bytes, flag='unchanged',
+ backend=self.imdecode_backend).squeeze().astype(np.uint8)
+ # reduce zero_label
+ if self.reduce_zero_label:
+ # avoid using underflow conversion
+ gt_semantic_seg[gt_semantic_seg == 0] = 255
+ gt_semantic_seg = gt_semantic_seg - 1
+ gt_semantic_seg[gt_semantic_seg == 254] = 255
+ results['gt_semantic_seg'] = gt_semantic_seg
+ results['seg_fields'].append('gt_semantic_seg')
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(reduce_zero_label={self.reduce_zero_label},'
+ repr_str += f"imdecode_backend='{self.imdecode_backend}')"
+ return repr_str
diff --git a/mmseg/datasets/pipelines/test_time_aug.py b/mmseg/datasets/pipelines/test_time_aug.py
new file mode 100644
index 0000000000..5712c79d58
--- /dev/null
+++ b/mmseg/datasets/pipelines/test_time_aug.py
@@ -0,0 +1,120 @@
+import warnings
+
+import mmcv
+
+from ..builder import PIPELINES
+from .compose import Compose
+
+
+@PIPELINES.register_module()
+class MultiScaleFlipAug(object):
+ """Test-time augmentation with multiple scales and flipping.
+
+ An example configuration is as followed:
+
+ .. code-block::
+
+ img_scale=(2048, 1024),
+ img_ratios=[0.5, 1.0],
+ flip=True,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ]
+
+ After MultiScaleFLipAug with above configuration, the results are wrapped
+ into lists of the same length as followed:
+
+ .. code-block::
+
+ dict(
+ img=[...],
+ img_shape=[...],
+ scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)]
+ flip=[False, True, False, True]
+ ...
+ )
+
+ Args:
+ transforms (list[dict]): Transforms to apply in each augmentation.
+ img_scale (tuple | list[tuple]): Images scales for resizing.
+ img_ratios (float | list[float]): Image ratios for resizing
+ flip (bool): Whether apply flip augmentation. Default: False.
+ flip_direction (str | list[str]): Flip augmentation directions,
+ options are "horizontal" and "vertical". If flip_direction is list,
+ multiple flip augmentations will be applied.
+ It has no effect when flip == False. Default: "horizontal".
+ """
+
+ def __init__(self,
+ transforms,
+ img_scale,
+ img_ratios=None,
+ flip=False,
+ flip_direction='horizontal'):
+ self.transforms = Compose(transforms)
+ if img_ratios is not None:
+ # mode 1: given a scale and a range of image ratio
+ img_ratios = img_ratios if isinstance(img_ratios,
+ list) else [img_ratios]
+ assert mmcv.is_list_of(img_ratios, float)
+ assert isinstance(img_scale, tuple) and len(img_scale) == 2
+ self.img_scale = [(int(img_scale[0] * ratio),
+ int(img_scale[1] * ratio))
+ for ratio in img_ratios]
+ else:
+ # mode 2: given multiple scales
+ self.img_scale = img_scale if isinstance(img_scale,
+ list) else [img_scale]
+ assert mmcv.is_list_of(self.img_scale, tuple)
+ self.flip = flip
+ self.flip_direction = flip_direction if isinstance(
+ flip_direction, list) else [flip_direction]
+ assert mmcv.is_list_of(self.flip_direction, str)
+ if not self.flip and self.flip_direction != ['horizontal']:
+ warnings.warn(
+ 'flip_direction has no effect when flip is set to False')
+ if (self.flip
+ and not any([t['type'] == 'RandomFlip' for t in transforms])):
+ warnings.warn(
+ 'flip has no effect when RandomFlip is not in transforms')
+
+ def __call__(self, results):
+ """Call function to apply test time augment transforms on results.
+
+ Args:
+ results (dict): Result dict contains the data to transform.
+
+ Returns:
+ dict[str: list]: The augmented data, where each value is wrapped
+ into a list.
+ """
+
+ aug_data = []
+ flip_aug = [False, True] if self.flip else [False]
+ for scale in self.img_scale:
+ for flip in flip_aug:
+ for direction in self.flip_direction:
+ _results = results.copy()
+ _results['scale'] = scale
+ _results['flip'] = flip
+ _results['flip_direction'] = direction
+ data = self.transforms(_results)
+ aug_data.append(data)
+ # list of dict to dict of list
+ aug_data_dict = {key: [] for key in aug_data[0]}
+ for data in aug_data:
+ for key, val in data.items():
+ aug_data_dict[key].append(val)
+ return aug_data_dict
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(transforms={self.transforms}, '
+ repr_str += f'img_scale={self.img_scale}, flip={self.flip})'
+ repr_str += f'flip_direction={self.flip_direction}'
+ return repr_str
diff --git a/mmseg/datasets/pipelines/transforms.py b/mmseg/datasets/pipelines/transforms.py
new file mode 100644
index 0000000000..b683973ca2
--- /dev/null
+++ b/mmseg/datasets/pipelines/transforms.py
@@ -0,0 +1,610 @@
+import mmcv
+import numpy as np
+from numpy import random
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class Resize(object):
+ """Resize images & seg.
+
+ This transform resizes the input image to some scale. If the input dict
+ contains the key "scale", then the scale in the input dict is used,
+ otherwise the specified scale in the init method is used.
+
+ ``img_scale`` can either be a tuple (single-scale) or a list of tuple
+ (multi-scale). There are 3 multiscale modes:
+ - ``ratio_range is not None``: randomly sample a ratio from the ratio range
+ and multiply it with the image scale.
+ - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
+ scale from the a range.
+ - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
+ scale from multiple scales.
+
+ Args:
+ img_scale (tuple or list[tuple]): Images scales for resizing.
+ multiscale_mode (str): Either "range" or "value".
+ ratio_range (tuple[float]): (min_ratio, max_ratio)
+ keep_ratio (bool): Whether to keep the aspect ratio when resizing the
+ image.
+ """
+
+ def __init__(self,
+ img_scale=None,
+ multiscale_mode='range',
+ ratio_range=None,
+ keep_ratio=True):
+ if img_scale is None:
+ self.img_scale = None
+ else:
+ if isinstance(img_scale, list):
+ self.img_scale = img_scale
+ else:
+ self.img_scale = [img_scale]
+ assert mmcv.is_list_of(self.img_scale, tuple)
+
+ if ratio_range is not None:
+ # mode 1: given a scale and a range of image ratio
+ assert len(self.img_scale) == 1
+ else:
+ # mode 2: given multiple scales or a range of scales
+ assert multiscale_mode in ['value', 'range']
+
+ self.multiscale_mode = multiscale_mode
+ self.ratio_range = ratio_range
+ self.keep_ratio = keep_ratio
+
+ @staticmethod
+ def random_select(img_scales):
+ """Randomly select an img_scale from given candidates.
+
+ Args:
+ img_scales (list[tuple]): Images scales for selection.
+
+ Returns:
+ (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
+ where ``img_scale`` is the selected image scale and
+ ``scale_idx`` is the selected index in the given candidates.
+ """
+
+ assert mmcv.is_list_of(img_scales, tuple)
+ scale_idx = np.random.randint(len(img_scales))
+ img_scale = img_scales[scale_idx]
+ return img_scale, scale_idx
+
+ @staticmethod
+ def random_sample(img_scales):
+ """Randomly sample an img_scale when ``multiscale_mode=='range'``.
+
+ Args:
+ img_scales (list[tuple]): Images scale range for sampling.
+ There must be two tuples in img_scales, which specify the lower
+ and uper bound of image scales.
+
+ Returns:
+ (tuple, None): Returns a tuple ``(img_scale, None)``, where
+ ``img_scale`` is sampled scale and None is just a placeholder
+ to be consistent with :func:`random_select`.
+ """
+
+ assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
+ img_scale_long = [max(s) for s in img_scales]
+ img_scale_short = [min(s) for s in img_scales]
+ long_edge = np.random.randint(
+ min(img_scale_long),
+ max(img_scale_long) + 1)
+ short_edge = np.random.randint(
+ min(img_scale_short),
+ max(img_scale_short) + 1)
+ img_scale = (long_edge, short_edge)
+ return img_scale, None
+
+ @staticmethod
+ def random_sample_ratio(img_scale, ratio_range):
+ """Randomly sample an img_scale when ``ratio_range`` is specified.
+
+ A ratio will be randomly sampled from the range specified by
+ ``ratio_range``. Then it would be multiplied with ``img_scale`` to
+ generate sampled scale.
+
+ Args:
+ img_scale (tuple): Images scale base to multiply with ratio.
+ ratio_range (tuple[float]): The minimum and maximum ratio to scale
+ the ``img_scale``.
+
+ Returns:
+ (tuple, None): Returns a tuple ``(scale, None)``, where
+ ``scale`` is sampled ratio multiplied with ``img_scale`` and
+ None is just a placeholder to be consistent with
+ :func:`random_select`.
+ """
+
+ assert isinstance(img_scale, tuple) and len(img_scale) == 2
+ min_ratio, max_ratio = ratio_range
+ assert min_ratio <= max_ratio
+ ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+ scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+ return scale, None
+
+ def _random_scale(self, results):
+ """Randomly sample an img_scale according to ``ratio_range`` and
+ ``multiscale_mode``.
+
+ If ``ratio_range`` is specified, a ratio will be sampled and be
+ multiplied with ``img_scale``.
+ If multiple scales are specified by ``img_scale``, a scale will be
+ sampled according to ``multiscale_mode``.
+ Otherwise, single scale will be used.
+
+ Args:
+ results (dict): Result dict from :obj:`dataset`.
+
+ Returns:
+ dict: Two new keys 'scale` and 'scale_idx` are added into
+ ``results``, which would be used by subsequent pipelines.
+ """
+
+ if self.ratio_range is not None:
+ scale, scale_idx = self.random_sample_ratio(
+ self.img_scale[0], self.ratio_range)
+ elif len(self.img_scale) == 1:
+ scale, scale_idx = self.img_scale[0], 0
+ elif self.multiscale_mode == 'range':
+ scale, scale_idx = self.random_sample(self.img_scale)
+ elif self.multiscale_mode == 'value':
+ scale, scale_idx = self.random_select(self.img_scale)
+ else:
+ raise NotImplementedError
+
+ results['scale'] = scale
+ results['scale_idx'] = scale_idx
+
+ def _resize_img(self, results):
+ """Resize images with ``results['scale']``."""
+ if self.keep_ratio:
+ img, scale_factor = mmcv.imrescale(
+ results['img'], results['scale'], return_scale=True)
+ # the w_scale and h_scale has minor difference
+ # a real fix should be done in the mmcv.imrescale in the future
+ new_h, new_w = img.shape[:2]
+ h, w = results['img'].shape[:2]
+ w_scale = new_w / w
+ h_scale = new_h / h
+ else:
+ img, w_scale, h_scale = mmcv.imresize(
+ results['img'], results['scale'], return_scale=True)
+ scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
+ dtype=np.float32)
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['pad_shape'] = img.shape # in case that there is no padding
+ results['scale_factor'] = scale_factor
+ results['keep_ratio'] = self.keep_ratio
+
+ def _resize_seg(self, results):
+ """Resize semantic segmentation map with ``results['scale']``."""
+ for key in results.get('seg_fields', []):
+ if self.keep_ratio:
+ gt_seg = mmcv.imrescale(
+ results[key], results['scale'], interpolation='nearest')
+ else:
+ gt_seg = mmcv.imresize(
+ results[key], results['scale'], interpolation='nearest')
+ results['gt_semantic_seg'] = gt_seg
+
+ def __call__(self, results):
+ """Call function to resize images, bounding boxes, masks, semantic
+ segmentation map.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
+ 'keep_ratio' keys are added into result dict.
+ """
+
+ if 'scale' not in results:
+ self._random_scale(results)
+ self._resize_img(results)
+ self._resize_seg(results)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += (f'(img_scale={self.img_scale}, '
+ f'multiscale_mode={self.multiscale_mode}, '
+ f'ratio_range={self.ratio_range}, '
+ f'keep_ratio={self.keep_ratio})')
+ return repr_str
+
+
+@PIPELINES.register_module()
+class RandomFlip(object):
+ """Flip the image & seg.
+
+ If the input dict contains the key "flip", then the flag will be used,
+ otherwise it will be randomly decided by a ratio specified in the init
+ method.
+
+ Args:
+ flip_ratio (float, optional): The flipping probability. Default: None.
+ direction(str, optional): The flipping direction. Options are
+ 'horizontal' and 'vertical'. Default: 'horizontal'.
+ """
+
+ def __init__(self, flip_ratio=None, direction='horizontal'):
+ self.flip_ratio = flip_ratio
+ self.direction = direction
+ if flip_ratio is not None:
+ assert flip_ratio >= 0 and flip_ratio <= 1
+ assert direction in ['horizontal', 'vertical']
+
+ def __call__(self, results):
+ """Call function to flip bounding boxes, masks, semantic segmentation
+ maps.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Flipped results, 'flip', 'flip_direction' keys are added into
+ result dict.
+ """
+
+ if 'flip' not in results:
+ flip = True if np.random.rand() < self.flip_ratio else False
+ results['flip'] = flip
+ if 'flip_direction' not in results:
+ results['flip_direction'] = self.direction
+ if results['flip']:
+ # flip image
+ results['img'] = mmcv.imflip(
+ results['img'], direction=results['flip_direction'])
+
+ # flip segs
+ for key in results.get('seg_fields', []):
+ # use copy() to make numpy stride positive
+ results[key] = mmcv.imflip(
+ results[key], direction=results['flip_direction']).copy()
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + f'(flip_ratio={self.flip_ratio})'
+
+
+@PIPELINES.register_module()
+class Pad(object):
+ """Pad the image & mask.
+
+ There are two padding modes: (1) pad to a fixed size and (2) pad to the
+ minimum size that is divisible by some number.
+ Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
+
+ Args:
+ size (tuple, optional): Fixed padding size.
+ size_divisor (int, optional): The divisor of padded size.
+ pad_val (float, optional): Padding value. Default: 0.
+ seg_pad_val (float, optional): Padding value of segmentation map.
+ Default: 255.
+ """
+
+ def __init__(self,
+ size=None,
+ size_divisor=None,
+ pad_val=0,
+ seg_pad_val=255):
+ self.size = size
+ self.size_divisor = size_divisor
+ self.pad_val = pad_val
+ self.seg_pad_val = seg_pad_val
+ # only one of size and size_divisor should be valid
+ assert size is not None or size_divisor is not None
+ assert size is None or size_divisor is None
+
+ def _pad_img(self, results):
+ """Pad images according to ``self.size``."""
+ if self.size is not None:
+ padded_img = mmcv.impad(
+ results['img'], shape=self.size, pad_val=self.pad_val)
+ elif self.size_divisor is not None:
+ padded_img = mmcv.impad_to_multiple(
+ results['img'], self.size_divisor, pad_val=self.pad_val)
+ results['img'] = padded_img
+ results['pad_shape'] = padded_img.shape
+ results['pad_fixed_size'] = self.size
+ results['pad_size_divisor'] = self.size_divisor
+
+ def _pad_seg(self, results):
+ """Pad masks according to ``results['pad_shape']``."""
+ for key in results.get('seg_fields', []):
+ results[key] = mmcv.impad(
+ results[key],
+ shape=results['pad_shape'][:2],
+ pad_val=self.seg_pad_val)
+
+ def __call__(self, results):
+ """Call function to pad images, masks, semantic segmentation maps.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Updated result dict.
+ """
+
+ self._pad_img(results)
+ self._pad_seg(results)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \
+ f'pad_val={self.pad_val})'
+ return repr_str
+
+
+@PIPELINES.register_module()
+class Normalize(object):
+ """Normalize the image.
+
+ Added key is "img_norm_cfg".
+
+ Args:
+ mean (sequence): Mean values of 3 channels.
+ std (sequence): Std values of 3 channels.
+ to_rgb (bool): Whether to convert the image from BGR to RGB,
+ default is true.
+ """
+
+ def __init__(self, mean, std, to_rgb=True):
+ self.mean = np.array(mean, dtype=np.float32)
+ self.std = np.array(std, dtype=np.float32)
+ self.to_rgb = to_rgb
+
+ def __call__(self, results):
+ """Call function to normalize images.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Normalized results, 'img_norm_cfg' key is added into
+ result dict.
+ """
+
+ results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std,
+ self.to_rgb)
+ results['img_norm_cfg'] = dict(
+ mean=self.mean, std=self.std, to_rgb=self.to_rgb)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \
+ f'{self.to_rgb})'
+ return repr_str
+
+
+@PIPELINES.register_module()
+class RandomCrop(object):
+ """Random crop the image & seg.
+
+ Args:
+ crop_size (tuple): Expected size after cropping, (h, w).
+ cat_max_ratio (float): The maximum ratio that single category could
+ occupy.
+ """
+
+ def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255):
+ assert crop_size[0] > 0 and crop_size[1] > 0
+ self.crop_size = crop_size
+ self.cat_max_ratio = cat_max_ratio
+ self.ignore_index = ignore_index
+
+ def get_crop_bbox(self, img):
+ """Randomly get a crop bounding box."""
+ margin_h = max(img.shape[0] - self.crop_size[0], 0)
+ margin_w = max(img.shape[1] - self.crop_size[1], 0)
+ offset_h = np.random.randint(0, margin_h + 1)
+ offset_w = np.random.randint(0, margin_w + 1)
+ crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0]
+ crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1]
+
+ return crop_y1, crop_y2, crop_x1, crop_x2
+
+ def crop(self, img, crop_bbox):
+ """Crop from ``img``"""
+ crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox
+ img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
+ return img
+
+ def __call__(self, results):
+ """Call function to randomly crop images, semantic segmentation maps.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Randomly cropped results, 'img_shape' key in result dict is
+ updated according to crop size.
+ """
+
+ img = results['img']
+ crop_bbox = self.get_crop_bbox(img)
+ if self.cat_max_ratio < 1.:
+ # Repeat 10 times
+ for _ in range(10):
+ seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox)
+ labels, cnt = np.unique(seg_temp, return_counts=True)
+ cnt = cnt[labels != self.ignore_index]
+ if len(cnt) > 1 and np.max(cnt) / np.sum(
+ cnt) < self.cat_max_ratio:
+ break
+ crop_bbox = self.get_crop_bbox(img)
+
+ # crop the image
+ img = self.crop(img, crop_bbox)
+ img_shape = img.shape
+ results['img'] = img
+ results['img_shape'] = img_shape
+
+ # crop semantic seg
+ for key in results.get('seg_fields', []):
+ results[key] = self.crop(results[key], crop_bbox)
+
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + f'(crop_size={self.crop_size})'
+
+
+@PIPELINES.register_module()
+class SegRescale(object):
+ """Rescale semantic segmentation maps.
+
+ Args:
+ scale_factor (float): The scale factor of the final output.
+ """
+
+ def __init__(self, scale_factor=1):
+ self.scale_factor = scale_factor
+
+ def __call__(self, results):
+ """Call function to scale the semantic segmentation map.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Result dict with semantic segmentation map scaled.
+ """
+ for key in results.get('seg_fields', []):
+ if self.scale_factor != 1:
+ results[key] = mmcv.imrescale(
+ results[key], self.scale_factor, interpolation='nearest')
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + f'(scale_factor={self.scale_factor})'
+
+
+@PIPELINES.register_module()
+class PhotoMetricDistortion(object):
+ """Apply photometric distortion to image sequentially, every transformation
+ is applied with a probability of 0.5. The position of random contrast is in
+ second or second to last.
+
+ 1. random brightness
+ 2. random contrast (mode 0)
+ 3. convert color from BGR to HSV
+ 4. random saturation
+ 5. random hue
+ 6. convert color from HSV to BGR
+ 7. random contrast (mode 1)
+ 8. randomly swap channels
+
+ Args:
+ brightness_delta (int): delta of brightness.
+ contrast_range (tuple): range of contrast.
+ saturation_range (tuple): range of saturation.
+ hue_delta (int): delta of hue.
+ """
+
+ def __init__(self,
+ brightness_delta=32,
+ contrast_range=(0.5, 1.5),
+ saturation_range=(0.5, 1.5),
+ hue_delta=18):
+ self.brightness_delta = brightness_delta
+ self.contrast_lower, self.contrast_upper = contrast_range
+ self.saturation_lower, self.saturation_upper = saturation_range
+ self.hue_delta = hue_delta
+
+ def convert(self, img, alpha=1, beta=0):
+ """Multiple with alpha and add beat with clip."""
+ img = img.astype(np.float32) * alpha + beta
+ img = np.clip(img, 0, 255)
+ return img.astype(np.uint8)
+
+ def brightness(self, img):
+ """Brightness distortion."""
+ if random.randint(2):
+ return self.convert(
+ img,
+ beta=random.uniform(-self.brightness_delta,
+ self.brightness_delta))
+ return img
+
+ def contrast(self, img):
+ """Contrast distortion."""
+ if random.randint(2):
+ return self.convert(
+ img,
+ alpha=random.uniform(self.contrast_lower, self.contrast_upper))
+ return img
+
+ def saturation(self, img):
+ """Saturation distortion."""
+ if random.randint(2):
+ img = mmcv.bgr2hsv(img)
+ img[:, :, 1] = self.convert(
+ img[:, :, 1],
+ alpha=random.uniform(self.saturation_lower,
+ self.saturation_upper))
+ img = mmcv.hsv2bgr(img)
+ return img
+
+ def hue(self, img):
+ """Hue distortion."""
+ if random.randint(2):
+ img = mmcv.bgr2hsv(img)
+ img[:, :,
+ 0] = (img[:, :, 0].astype(int) +
+ random.randint(-self.hue_delta, self.hue_delta)) % 180
+ img = mmcv.hsv2bgr(img)
+ return img
+
+ def __call__(self, results):
+ """Call function to perform photometric distortion on images.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Result dict with images distorted.
+ """
+
+ img = results['img']
+ # random brightness
+ img = self.brightness(img)
+
+ # mode == 0 --> do random contrast first
+ # mode == 1 --> do random contrast last
+ mode = random.randint(2)
+ if mode == 1:
+ img = self.contrast(img)
+
+ # random saturation
+ img = self.saturation(img)
+
+ # random hue
+ img = self.hue(img)
+
+ # random contrast
+ if mode == 0:
+ img = self.contrast(img)
+
+ results['img'] = img
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += (f'(brightness_delta={self.brightness_delta}, '
+ f'contrast_range=({self.contrast_lower}, '
+ f'{self.contrast_upper}), '
+ f'saturation_range=({self.saturation_lower}, '
+ f'{self.saturation_upper}), '
+ f'hue_delta={self.hue_delta})')
+ return repr_str
diff --git a/mmseg/datasets/voc.py b/mmseg/datasets/voc.py
new file mode 100644
index 0000000000..a8855203b1
--- /dev/null
+++ b/mmseg/datasets/voc.py
@@ -0,0 +1,29 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class PascalVOCDataset(CustomDataset):
+ """Pascal VOC dataset.
+
+ Args:
+ split (str): Split txt file for Pascal VOC.
+ """
+
+ CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+ 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+ 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
+ 'train', 'tvmonitor')
+
+ PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
+ [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
+ [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
+ [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
+ [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
+
+ def __init__(self, split, **kwargs):
+ super(PascalVOCDataset, self).__init__(
+ img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs)
+ assert osp.exists(self.img_dir) and self.split is not None
diff --git a/mmseg/models/__init__.py b/mmseg/models/__init__.py
new file mode 100644
index 0000000000..d492a2324f
--- /dev/null
+++ b/mmseg/models/__init__.py
@@ -0,0 +1,11 @@
+from .backbones import * # noqa: F401,F403
+from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone,
+ build_head, build_loss, build_segmentor)
+from .decode_heads import * # noqa: F401,F403
+from .losses import * # noqa: F401,F403
+from .segmentors import * # noqa: F401,F403
+
+__all__ = [
+ 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone',
+ 'build_head', 'build_loss', 'build_segmentor'
+]
diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py
new file mode 100644
index 0000000000..367b398ce8
--- /dev/null
+++ b/mmseg/models/backbones/__init__.py
@@ -0,0 +1,5 @@
+from .hrnet import HRNet
+from .resnet import ResNet, ResNetV1c, ResNetV1d
+from .resnext import ResNeXt
+
+__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet']
diff --git a/mmseg/models/backbones/hrnet.py b/mmseg/models/backbones/hrnet.py
new file mode 100644
index 0000000000..e4247ba67e
--- /dev/null
+++ b/mmseg/models/backbones/hrnet.py
@@ -0,0 +1,555 @@
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
+ kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmseg.ops import resize
+from mmseg.utils import get_root_logger
+from ..builder import BACKBONES
+from .resnet import BasicBlock, Bottleneck
+
+
+class HRModule(nn.Module):
+ """High-Resolution Module for HRNet.
+
+ In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange
+ is in this module.
+ """
+
+ def __init__(self,
+ num_branches,
+ blocks,
+ num_blocks,
+ in_channels,
+ num_channels,
+ multiscale_output=True,
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True)):
+ super(HRModule, self).__init__()
+ self._check_branches(num_branches, num_blocks, in_channels,
+ num_channels)
+
+ self.in_channels = in_channels
+ self.num_branches = num_branches
+
+ self.multiscale_output = multiscale_output
+ self.norm_cfg = norm_cfg
+ self.conv_cfg = conv_cfg
+ self.with_cp = with_cp
+ self.branches = self._make_branches(num_branches, blocks, num_blocks,
+ num_channels)
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU(inplace=False)
+
+ def _check_branches(self, num_branches, num_blocks, in_channels,
+ num_channels):
+ """Check branches configuration."""
+ if num_branches != len(num_blocks):
+ error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \
+ f'{len(num_blocks)})'
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \
+ f'{len(num_channels)})'
+ raise ValueError(error_msg)
+
+ if num_branches != len(in_channels):
+ error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \
+ f'{len(in_channels)})'
+ raise ValueError(error_msg)
+
+ def _make_one_branch(self,
+ branch_index,
+ block,
+ num_blocks,
+ num_channels,
+ stride=1):
+ """Build one branch."""
+ downsample = None
+ if stride != 1 or \
+ self.in_channels[branch_index] != \
+ num_channels[branch_index] * block.expansion:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ self.in_channels[branch_index],
+ num_channels[branch_index] * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(self.norm_cfg, num_channels[branch_index] *
+ block.expansion)[1])
+
+ layers = []
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ self.in_channels[branch_index] = \
+ num_channels[branch_index] * block.expansion
+ for i in range(1, num_blocks[branch_index]):
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
+ """Build multiple branch."""
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(
+ self._make_one_branch(i, block, num_blocks, num_channels))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ """Build fuse layer."""
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ in_channels = self.in_channels
+ fuse_layers = []
+ num_out_branches = num_branches if self.multiscale_output else 1
+ for i in range(num_out_branches):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels[i])[1],
+ # we set align_corners=False for HRNet
+ nn.Upsample(
+ scale_factor=2**(j - i),
+ mode='bilinear',
+ align_corners=False)))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv_downsamples = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[i])[1]))
+ else:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ nn.ReLU(inplace=False)))
+ fuse_layer.append(nn.Sequential(*conv_downsamples))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def forward(self, x):
+ """Forward function."""
+ if self.num_branches == 1:
+ return [self.branches[0](x[0])]
+
+ for i in range(self.num_branches):
+ x[i] = self.branches[i](x[i])
+
+ x_fuse = []
+ for i in range(len(self.fuse_layers)):
+ y = 0
+ for j in range(self.num_branches):
+ if i == j:
+ y += x[j]
+ elif j > i:
+ y = y + resize(
+ self.fuse_layers[i][j](x[j]),
+ size=x[i].shape[2:],
+ mode='bilinear',
+ align_corners=False)
+ else:
+ y += self.fuse_layers[i][j](x[j])
+ x_fuse.append(self.relu(y))
+ return x_fuse
+
+
+@BACKBONES.register_module()
+class HRNet(nn.Module):
+ """HRNet backbone.
+
+ High-Resolution Representations for Labeling Pixels and Regions
+ arXiv: https://arxiv.org/abs/1904.04514
+
+ Args:
+ extra (dict): detailed configuration for each stage of HRNet.
+ in_channels (int): Number of input image channels. Normally 3.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmseg.models import HRNet
+ >>> import torch
+ >>> extra = dict(
+ >>> stage1=dict(
+ >>> num_modules=1,
+ >>> num_branches=1,
+ >>> block='BOTTLENECK',
+ >>> num_blocks=(4, ),
+ >>> num_channels=(64, )),
+ >>> stage2=dict(
+ >>> num_modules=1,
+ >>> num_branches=2,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4),
+ >>> num_channels=(32, 64)),
+ >>> stage3=dict(
+ >>> num_modules=4,
+ >>> num_branches=3,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4),
+ >>> num_channels=(32, 64, 128)),
+ >>> stage4=dict(
+ >>> num_modules=3,
+ >>> num_branches=4,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4, 4),
+ >>> num_channels=(32, 64, 128, 256)))
+ >>> self = HRNet(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 32, 8, 8)
+ (1, 64, 4, 4)
+ (1, 128, 2, 2)
+ (1, 256, 1, 1)
+ """
+
+ blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ with_cp=False,
+ zero_init_residual=False):
+ super(HRNet, self).__init__()
+ self.extra = extra
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+ self.zero_init_residual = zero_init_residual
+
+ # stem net
+ self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ 64,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.relu = nn.ReLU(inplace=True)
+
+ # stage 1
+ self.stage1_cfg = self.extra['stage1']
+ num_channels = self.stage1_cfg['num_channels'][0]
+ block_type = self.stage1_cfg['block']
+ num_blocks = self.stage1_cfg['num_blocks'][0]
+
+ block = self.blocks_dict[block_type]
+ stage1_out_channels = num_channels * block.expansion
+ self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)
+
+ # stage 2
+ self.stage2_cfg = self.extra['stage2']
+ num_channels = self.stage2_cfg['num_channels']
+ block_type = self.stage2_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [channel * block.expansion for channel in num_channels]
+ self.transition1 = self._make_transition_layer([stage1_out_channels],
+ num_channels)
+ self.stage2, pre_stage_channels = self._make_stage(
+ self.stage2_cfg, num_channels)
+
+ # stage 3
+ self.stage3_cfg = self.extra['stage3']
+ num_channels = self.stage3_cfg['num_channels']
+ block_type = self.stage3_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [channel * block.expansion for channel in num_channels]
+ self.transition2 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+ self.stage3, pre_stage_channels = self._make_stage(
+ self.stage3_cfg, num_channels)
+
+ # stage 4
+ self.stage4_cfg = self.extra['stage4']
+ num_channels = self.stage4_cfg['num_channels']
+ block_type = self.stage4_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [channel * block.expansion for channel in num_channels]
+ self.transition3 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+ self.stage4, pre_stage_channels = self._make_stage(
+ self.stage4_cfg, num_channels)
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: the normalization layer named "norm2" """
+ return getattr(self, self.norm2_name)
+
+ def _make_transition_layer(self, num_channels_pre_layer,
+ num_channels_cur_layer):
+ """Make transition layer."""
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_cur_layer[i])[1],
+ nn.ReLU(inplace=True)))
+ else:
+ transition_layers.append(None)
+ else:
+ conv_downsamples = []
+ for j in range(i + 1 - num_branches_pre):
+ in_channels = num_channels_pre_layer[-1]
+ out_channels = num_channels_cur_layer[i] \
+ if j == i - num_branches_pre else in_channels
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1],
+ nn.ReLU(inplace=True)))
+ transition_layers.append(nn.Sequential(*conv_downsamples))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+ """Make each layer."""
+ downsample = None
+ if stride != 1 or inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+ layers = []
+ layers.append(
+ block(
+ inplanes,
+ planes,
+ stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(
+ block(
+ inplanes,
+ planes,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_stage(self, layer_config, in_channels, multiscale_output=True):
+ """Make each stage."""
+ num_modules = layer_config['num_modules']
+ num_branches = layer_config['num_branches']
+ num_blocks = layer_config['num_blocks']
+ num_channels = layer_config['num_channels']
+ block = self.blocks_dict[layer_config['block']]
+
+ hr_modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used for the last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ hr_modules.append(
+ HRModule(
+ num_branches,
+ block,
+ num_blocks,
+ in_channels,
+ num_channels,
+ reset_multiscale_output,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*hr_modules), in_channels
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Forward function."""
+
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+ x = self.norm2(x)
+ x = self.relu(x)
+ x = self.layer1(x)
+
+ x_list = []
+ for i in range(self.stage2_cfg['num_branches']):
+ if self.transition1[i] is not None:
+ x_list.append(self.transition1[i](x))
+ else:
+ x_list.append(x)
+ y_list = self.stage2(x_list)
+
+ x_list = []
+ for i in range(self.stage3_cfg['num_branches']):
+ if self.transition2[i] is not None:
+ x_list.append(self.transition2[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage3(x_list)
+
+ x_list = []
+ for i in range(self.stage4_cfg['num_branches']):
+ if self.transition3[i] is not None:
+ x_list.append(self.transition3[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage4(x_list)
+
+ return y_list
+
+ def train(self, mode=True):
+ """Convert the model into training mode whill keeping the normalization
+ layer freezed."""
+ super(HRNet, self).train(mode)
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/mmseg/models/backbones/resnet.py b/mmseg/models/backbones/resnet.py
new file mode 100644
index 0000000000..4e90c67778
--- /dev/null
+++ b/mmseg/models/backbones/resnet.py
@@ -0,0 +1,689 @@
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer,
+ constant_init, kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmseg.utils import get_root_logger
+from ..builder import BACKBONES
+from ..utils import ResLayer
+
+
+class BasicBlock(nn.Module):
+ """Basic block for ResNet."""
+
+ expansion = 1
+
+ def __init__(self,
+ inplanes,
+ planes,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ dcn=None,
+ plugins=None):
+ super(BasicBlock, self).__init__()
+ assert dcn is None, 'Not implemented yet.'
+ assert plugins is None, 'Not implemented yet.'
+
+ self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes,
+ 3,
+ stride=stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg, planes, planes, 3, padding=1, bias=False)
+ self.add_module(self.norm2_name, norm2)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+ self.dilation = dilation
+ self.with_cp = with_cp
+
+ @property
+ def norm1(self):
+ """nn.Module: normalization layer after the first convolution layer"""
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: normalization layer after the second convolution layer"""
+ return getattr(self, self.norm2_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ """Bottleneck block for ResNet.
+
+ If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is
+ "caffe", the stride-two layer is the first 1x1 conv layer.
+ """
+
+ expansion = 4
+
+ def __init__(self,
+ inplanes,
+ planes,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ dcn=None,
+ plugins=None):
+ super(Bottleneck, self).__init__()
+ assert style in ['pytorch', 'caffe']
+ assert dcn is None or isinstance(dcn, dict)
+ assert plugins is None or isinstance(plugins, list)
+ if plugins is not None:
+ allowed_position = ['after_conv1', 'after_conv2', 'after_conv3']
+ assert all(p['position'] in allowed_position for p in plugins)
+
+ self.inplanes = inplanes
+ self.planes = planes
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.dcn = dcn
+ self.with_dcn = dcn is not None
+ self.plugins = plugins
+ self.with_plugins = plugins is not None
+
+ if self.with_plugins:
+ # collect plugins for conv1/conv2/conv3
+ self.after_conv1_plugins = [
+ plugin['cfg'] for plugin in plugins
+ if plugin['position'] == 'after_conv1'
+ ]
+ self.after_conv2_plugins = [
+ plugin['cfg'] for plugin in plugins
+ if plugin['position'] == 'after_conv2'
+ ]
+ self.after_conv3_plugins = [
+ plugin['cfg'] for plugin in plugins
+ if plugin['position'] == 'after_conv3'
+ ]
+
+ if self.style == 'pytorch':
+ self.conv1_stride = 1
+ self.conv2_stride = stride
+ else:
+ self.conv1_stride = stride
+ self.conv2_stride = 1
+
+ self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ norm_cfg, planes * self.expansion, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ fallback_on_stride = False
+ if self.with_dcn:
+ fallback_on_stride = dcn.pop('fallback_on_stride', False)
+ if not self.with_dcn or fallback_on_stride:
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ planes,
+ planes,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+ else:
+ assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
+ self.conv2 = build_conv_layer(
+ dcn,
+ planes,
+ planes,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ conv_cfg,
+ planes,
+ planes * self.expansion,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ if self.with_plugins:
+ self.after_conv1_plugin_names = self.make_block_plugins(
+ planes, self.after_conv1_plugins)
+ self.after_conv2_plugin_names = self.make_block_plugins(
+ planes, self.after_conv2_plugins)
+ self.after_conv3_plugin_names = self.make_block_plugins(
+ planes * self.expansion, self.after_conv3_plugins)
+
+ def make_block_plugins(self, in_channels, plugins):
+ """make plugins for block.
+
+ Args:
+ in_channels (int): Input channels of plugin.
+ plugins (list[dict]): List of plugins cfg to build.
+
+ Returns:
+ list[str]: List of the names of plugin.
+ """
+ assert isinstance(plugins, list)
+ plugin_names = []
+ for plugin in plugins:
+ plugin = plugin.copy()
+ name, layer = build_plugin_layer(
+ plugin,
+ in_channels=in_channels,
+ postfix=plugin.pop('postfix', ''))
+ assert not hasattr(self, name), f'duplicate plugin {name}'
+ self.add_module(name, layer)
+ plugin_names.append(name)
+ return plugin_names
+
+ def forward_plugin(self, x, plugin_names):
+ """Forward function for plugins."""
+ out = x
+ for name in plugin_names:
+ out = getattr(self, name)(x)
+ return out
+
+ @property
+ def norm1(self):
+ """nn.Module: normalization layer after the first convolution layer"""
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ """nn.Module: normalization layer after the second convolution layer"""
+ return getattr(self, self.norm2_name)
+
+ @property
+ def norm3(self):
+ """nn.Module: normalization layer after the third convolution layer"""
+ return getattr(self, self.norm3_name)
+
+ def forward(self, x):
+ """Forward function."""
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ if self.with_plugins:
+ out = self.forward_plugin(out, self.after_conv1_plugin_names)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ if self.with_plugins:
+ out = self.forward_plugin(out, self.after_conv2_plugin_names)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.with_plugins:
+ out = self.forward_plugin(out, self.after_conv3_plugin_names)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+@BACKBONES.register_module()
+class ResNet(nn.Module):
+ """ResNet backbone.
+
+ Args:
+ depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Default" 3.
+ stem_channels (int): Number of stem channels. Default: 64.
+ base_channels (int): Number of base channels of res layer. Default: 64.
+ num_stages (int): Resnet stages, normally 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ plugins (list[dict]): List of plugins for stages, each dict contains:
+ cfg (dict, required): Cfg dict to build plugin.
+ position (str, required): Position inside block to insert plugin,
+ options: 'after_conv1', 'after_conv2', 'after_conv3'.
+ stages (tuple[bool], optional): Stages to apply plugin, length
+ should be same as 'num_stages'
+ multi_grid (Sequence[int]|None): Multi grid dilation rates of last
+ stage. Default: None
+ contract_dilation (bool): Whether contract first dilation of each layer
+ Default: False
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): Whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmseg.models import ResNet
+ >>> import torch
+ >>> self = ResNet(depth=18)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 64, 8, 8)
+ (1, 128, 4, 4)
+ (1, 256, 2, 2)
+ (1, 512, 1, 1)
+ """
+
+ arch_settings = {
+ 18: (BasicBlock, (2, 2, 2, 2)),
+ 34: (BasicBlock, (3, 4, 6, 3)),
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self,
+ depth,
+ in_channels=3,
+ stem_channels=64,
+ base_channels=64,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(0, 1, 2, 3),
+ style='pytorch',
+ deep_stem=False,
+ avg_down=False,
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ dcn=None,
+ stage_with_dcn=(False, False, False, False),
+ plugins=None,
+ multi_grid=None,
+ contract_dilation=False,
+ with_cp=False,
+ zero_init_residual=True):
+ super(ResNet, self).__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for resnet')
+ self.depth = depth
+ self.stem_channels = stem_channels
+ self.base_channels = base_channels
+ self.num_stages = num_stages
+ assert num_stages >= 1 and num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.deep_stem = deep_stem
+ self.avg_down = avg_down
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.dcn = dcn
+ self.stage_with_dcn = stage_with_dcn
+ if dcn is not None:
+ assert len(stage_with_dcn) == num_stages
+ self.plugins = plugins
+ self.multi_grid = multi_grid
+ self.contract_dilation = contract_dilation
+ self.zero_init_residual = zero_init_residual
+ self.block, stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ self.inplanes = stem_channels
+
+ self._make_stem_layer(in_channels, stem_channels)
+
+ self.res_layers = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = strides[i]
+ dilation = dilations[i]
+ dcn = self.dcn if self.stage_with_dcn[i] else None
+ if plugins is not None:
+ stage_plugins = self.make_stage_plugins(plugins, i)
+ else:
+ stage_plugins = None
+ # multi grid is applied to last layer only
+ stage_multi_grid = multi_grid if i == len(
+ self.stage_blocks) - 1 else None
+ planes = base_channels * 2**i
+ res_layer = self.make_res_layer(
+ block=self.block,
+ inplanes=self.inplanes,
+ planes=planes,
+ num_blocks=num_blocks,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ avg_down=self.avg_down,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ dcn=dcn,
+ plugins=stage_plugins,
+ multi_grid=stage_multi_grid,
+ contract_dilation=contract_dilation)
+ self.inplanes = planes * self.block.expansion
+ layer_name = f'layer{i+1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = self.block.expansion * base_channels * 2**(
+ len(self.stage_blocks) - 1)
+
+ def make_stage_plugins(self, plugins, stage_idx):
+ """make plugins for ResNet 'stage_idx'th stage .
+
+ Currently we support to insert 'context_block',
+ 'empirical_attention_block', 'nonlocal_block' into the backbone like
+ ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of
+ Bottleneck.
+
+ An example of plugins format could be :
+ >>> plugins=[
+ ... dict(cfg=dict(type='xxx', arg1='xxx'),
+ ... stages=(False, True, True, True),
+ ... position='after_conv2'),
+ ... dict(cfg=dict(type='yyy'),
+ ... stages=(True, True, True, True),
+ ... position='after_conv3'),
+ ... dict(cfg=dict(type='zzz', postfix='1'),
+ ... stages=(True, True, True, True),
+ ... position='after_conv3'),
+ ... dict(cfg=dict(type='zzz', postfix='2'),
+ ... stages=(True, True, True, True),
+ ... position='after_conv3')
+ ... ]
+ >>> self = ResNet(depth=18)
+ >>> stage_plugins = self.make_stage_plugins(plugins, 0)
+ >>> assert len(stage_plugins) == 3
+
+ Suppose 'stage_idx=0', the structure of blocks in the stage would be:
+ conv1-> conv2->conv3->yyy->zzz1->zzz2
+ Suppose 'stage_idx=1', the structure of blocks in the stage would be:
+ conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2
+
+ If stages is missing, the plugin would be applied to all stages.
+
+ Args:
+ plugins (list[dict]): List of plugins cfg to build. The postfix is
+ required if multiple same type plugins are inserted.
+ stage_idx (int): Index of stage to build
+
+ Returns:
+ list[dict]: Plugins for current stage
+ """
+ stage_plugins = []
+ for plugin in plugins:
+ plugin = plugin.copy()
+ stages = plugin.pop('stages', None)
+ assert stages is None or len(stages) == self.num_stages
+ # whether to insert plugin into current stage
+ if stages is None or stages[stage_idx]:
+ stage_plugins.append(plugin)
+
+ return stage_plugins
+
+ def make_res_layer(self, **kwargs):
+ """Pack all blocks in a stage into a ``ResLayer``."""
+ return ResLayer(**kwargs)
+
+ @property
+ def norm1(self):
+ """nn.Module: the normalization layer named "norm1" """
+ return getattr(self, self.norm1_name)
+
+ def _make_stem_layer(self, in_channels, stem_channels):
+ """Make stem layer for ResNet."""
+ if self.deep_stem:
+ self.stem = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
+ nn.ReLU(inplace=True),
+ build_conv_layer(
+ self.conv_cfg,
+ stem_channels // 2,
+ stem_channels // 2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
+ nn.ReLU(inplace=True),
+ build_conv_layer(
+ self.conv_cfg,
+ stem_channels // 2,
+ stem_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, stem_channels)[1],
+ nn.ReLU(inplace=True))
+ else:
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ stem_channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, stem_channels, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ def _freeze_stages(self):
+ """Freeze stages param and norm stats."""
+ if self.frozen_stages >= 0:
+ if self.deep_stem:
+ self.stem.eval()
+ for param in self.stem.parameters():
+ param.requires_grad = False
+ else:
+ self.norm1.eval()
+ for m in [self.conv1, self.norm1]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = getattr(self, f'layer{i}')
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.dcn is not None:
+ for m in self.modules():
+ if isinstance(m, Bottleneck) and hasattr(
+ m, 'conv2_offset'):
+ constant_init(m.conv2_offset, 0)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ """Forward function."""
+ if self.deep_stem:
+ x = self.stem(x)
+ else:
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ return tuple(outs)
+
+ def train(self, mode=True):
+ """Convert the model into training mode while keep normalization layer
+ freezed."""
+ super(ResNet, self).train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
+
+
+@BACKBONES.register_module()
+class ResNetV1c(ResNet):
+ """ResNetV1c variant described in [1]_.
+
+ Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv
+ in the input stem with three 3x3 convs.
+
+ References:
+ .. [1] https://arxiv.org/pdf/1812.01187.pdf
+ """
+
+ def __init__(self, **kwargs):
+ super(ResNetV1c, self).__init__(
+ deep_stem=True, avg_down=False, **kwargs)
+
+
+@BACKBONES.register_module()
+class ResNetV1d(ResNet):
+ """ResNetV1d variant described in [1]_.
+
+ Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv
+ in the input stem with three 3x3 convs. And in the downsampling block,
+ a 2x2 avg_pool with stride 2 is added before conv, whose stride is
+ changed to 1.
+
+ References:
+ .. [1] https://arxiv.org/pdf/1812.01187.pdf
+ """
+
+ def __init__(self, **kwargs):
+ super(ResNetV1d, self).__init__(
+ deep_stem=True, avg_down=True, **kwargs)
diff --git a/mmseg/models/backbones/resnext.py b/mmseg/models/backbones/resnext.py
new file mode 100644
index 0000000000..fa8149ce2f
--- /dev/null
+++ b/mmseg/models/backbones/resnext.py
@@ -0,0 +1,145 @@
+import math
+
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from ..utils import ResLayer
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResNet
+
+
+class Bottleneck(_Bottleneck):
+ """Bottleneck block for ResNeXt.
+
+ If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is
+ "caffe", the stride-two layer is the first 1x1 conv layer.
+ """
+
+ def __init__(self,
+ inplanes,
+ planes,
+ groups=1,
+ base_width=4,
+ base_channels=64,
+ **kwargs):
+ super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
+
+ if groups == 1:
+ width = self.planes
+ else:
+ width = math.floor(self.planes *
+ (base_width / base_channels)) * groups
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, width, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, width, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.planes * self.expansion, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.inplanes,
+ width,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ fallback_on_stride = False
+ self.with_modulated_dcn = False
+ if self.with_dcn:
+ fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
+ if not self.with_dcn or fallback_on_stride:
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ width,
+ width,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+ else:
+ assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
+ self.conv2 = build_conv_layer(
+ self.dcn,
+ width,
+ width,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ width,
+ self.planes * self.expansion,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+
+@BACKBONES.register_module()
+class ResNeXt(ResNet):
+ """ResNeXt backbone.
+
+ Args:
+ depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Normally 3.
+ num_stages (int): Resnet stages, normally 4.
+ groups (int): Group of resnext.
+ base_width (int): Base width of resnext.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmseg.models import ResNeXt
+ >>> import torch
+ >>> self = ResNeXt(depth=50)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 8, 8)
+ (1, 512, 4, 4)
+ (1, 1024, 2, 2)
+ (1, 2048, 1, 1)
+ """
+
+ arch_settings = {
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, groups=1, base_width=4, **kwargs):
+ self.groups = groups
+ self.base_width = base_width
+ super(ResNeXt, self).__init__(**kwargs)
+
+ def make_res_layer(self, **kwargs):
+ """Pack all blocks in a stage into a ``ResLayer``"""
+ return ResLayer(
+ groups=self.groups,
+ base_width=self.base_width,
+ base_channels=self.base_channels,
+ **kwargs)
diff --git a/mmseg/models/builder.py b/mmseg/models/builder.py
new file mode 100644
index 0000000000..f4b84dd60f
--- /dev/null
+++ b/mmseg/models/builder.py
@@ -0,0 +1,56 @@
+from mmcv.utils import Registry, build_from_cfg
+from torch import nn
+
+BACKBONES = Registry('backbone')
+NECKS = Registry('neck')
+HEADS = Registry('head')
+LOSSES = Registry('loss')
+SEGMENTORS = Registry('segmentor')
+
+
+def build(cfg, registry, default_args=None):
+ """Build a module.
+
+ Args:
+ cfg (dict, list[dict]): The config of modules, is is either a dict
+ or a list of configs.
+ registry (:obj:`Registry`): A registry the module belongs to.
+ default_args (dict, optional): Default arguments to build the module.
+ Defaults to None.
+
+ Returns:
+ nn.Module: A built nn module.
+ """
+
+ if isinstance(cfg, list):
+ modules = [
+ build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
+ ]
+ return nn.Sequential(*modules)
+ else:
+ return build_from_cfg(cfg, registry, default_args)
+
+
+def build_backbone(cfg):
+ """Build backbone."""
+ return build(cfg, BACKBONES)
+
+
+def build_neck(cfg):
+ """Build neck."""
+ return build(cfg, NECKS)
+
+
+def build_head(cfg):
+ """Build head."""
+ return build(cfg, HEADS)
+
+
+def build_loss(cfg):
+ """Build loss."""
+ return build(cfg, LOSSES)
+
+
+def build_segmentor(cfg, train_cfg=None, test_cfg=None):
+ """Build segmentor."""
+ return build(cfg, SEGMENTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py
new file mode 100644
index 0000000000..fda4309436
--- /dev/null
+++ b/mmseg/models/decode_heads/__init__.py
@@ -0,0 +1,19 @@
+from .ann_head import ANNHead
+from .aspp_head import ASPPHead
+from .cc_head import CCHead
+from .da_head import DAHead
+from .enc_head import EncHead
+from .fcn_head import FCNHead
+from .gc_head import GCHead
+from .nl_head import NLHead
+from .ocr_head import OCRHead
+from .psa_head import PSAHead
+from .psp_head import PSPHead
+from .sep_aspp_head import DepthwiseSeparableASPPHead
+from .uper_head import UPerHead
+
+__all__ = [
+ 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
+ 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
+ 'EncHead'
+]
diff --git a/mmseg/models/decode_heads/ann_head.py b/mmseg/models/decode_heads/ann_head.py
new file mode 100644
index 0000000000..396c54e150
--- /dev/null
+++ b/mmseg/models/decode_heads/ann_head.py
@@ -0,0 +1,245 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from ..builder import HEADS
+from ..utils import SelfAttentionBlock as _SelfAttentionBlock
+from .decode_head import BaseDecodeHead
+
+
+class PPMConcat(nn.ModuleList):
+ """Pyramid Pooling Module that only concat the features of each layer.
+
+ Args:
+ pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module.
+ """
+
+ def __init__(self, pool_scales=(1, 3, 6, 8)):
+ super(PPMConcat, self).__init__(
+ [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales])
+
+ def forward(self, feats):
+ """Forward function."""
+ ppm_outs = []
+ for ppm in self:
+ ppm_out = ppm(feats)
+ ppm_outs.append(ppm_out.view(*feats.shape[:2], -1))
+ concat_outs = torch.cat(ppm_outs, dim=2)
+ return concat_outs
+
+
+class SelfAttentionBlock(_SelfAttentionBlock):
+ """Make a ANN used SelfAttentionBlock.
+
+ Args:
+ low_in_channels (int): Input channels of lower level feature,
+ which is the key feature for self-attention.
+ high_in_channels (int): Input channels of higher level feature,
+ which is the query feature for self-attention.
+ channels (int): Output channels of key/query transform.
+ out_channels (int): Output channels.
+ share_key_query (bool): Whether share projection weight between key
+ and query projection.
+ query_scale (int): The scale of query feature map.
+ key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module of key feature.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict|None): Config of activation layers.
+ """
+
+ def __init__(self, low_in_channels, high_in_channels, channels,
+ out_channels, share_key_query, query_scale, key_pool_scales,
+ conv_cfg, norm_cfg, act_cfg):
+ key_psp = PPMConcat(key_pool_scales)
+ if query_scale > 1:
+ query_downsample = nn.MaxPool2d(kernel_size=query_scale)
+ else:
+ query_downsample = None
+ super(SelfAttentionBlock, self).__init__(
+ key_in_channels=low_in_channels,
+ query_in_channels=high_in_channels,
+ channels=channels,
+ out_channels=out_channels,
+ share_key_query=share_key_query,
+ query_downsample=query_downsample,
+ key_downsample=key_psp,
+ key_query_num_convs=1,
+ key_query_norm=True,
+ value_out_num_convs=1,
+ value_out_norm=False,
+ matmul_norm=True,
+ with_out=True,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+
+class AFNB(nn.Module):
+ """Asymmetric Fusion Non-local Block(AFNB)
+
+ Args:
+ low_in_channels (int): Input channels of lower level feature,
+ which is the key feature for self-attention.
+ high_in_channels (int): Input channels of higher level feature,
+ which is the query feature for self-attention.
+ channels (int): Output channels of key/query transform.
+ out_channels (int): Output channels.
+ and query projection.
+ query_scales (tuple[int]): The scales of query feature map.
+ Default: (1,)
+ key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module of key feature.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict|None): Config of activation layers.
+ """
+
+ def __init__(self, low_in_channels, high_in_channels, channels,
+ out_channels, query_scales, key_pool_scales, conv_cfg,
+ norm_cfg, act_cfg):
+ super(AFNB, self).__init__()
+ self.stages = nn.ModuleList()
+ for query_scale in query_scales:
+ self.stages.append(
+ SelfAttentionBlock(
+ low_in_channels=low_in_channels,
+ high_in_channels=high_in_channels,
+ channels=channels,
+ out_channels=out_channels,
+ share_key_query=False,
+ query_scale=query_scale,
+ key_pool_scales=key_pool_scales,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+ self.bottleneck = ConvModule(
+ out_channels + high_in_channels,
+ out_channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ def forward(self, low_feats, high_feats):
+ """Forward function."""
+ priors = [stage(high_feats, low_feats) for stage in self.stages]
+ context = torch.stack(priors, dim=0).sum(dim=0)
+ output = self.bottleneck(torch.cat([context, high_feats], 1))
+ return output
+
+
+class APNB(nn.Module):
+ """Asymmetric Pyramid Non-local Block (APNB)
+
+ Args:
+ in_channels (int): Input channels of key/query feature,
+ which is the key feature for self-attention.
+ channels (int): Output channels of key/query transform.
+ out_channels (int): Output channels.
+ query_scales (tuple[int]): The scales of query feature map.
+ Default: (1,)
+ key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module of key feature.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict|None): Config of activation layers.
+ """
+
+ def __init__(self, in_channels, channels, out_channels, query_scales,
+ key_pool_scales, conv_cfg, norm_cfg, act_cfg):
+ super(APNB, self).__init__()
+ self.stages = nn.ModuleList()
+ for query_scale in query_scales:
+ self.stages.append(
+ SelfAttentionBlock(
+ low_in_channels=in_channels,
+ high_in_channels=in_channels,
+ channels=channels,
+ out_channels=out_channels,
+ share_key_query=True,
+ query_scale=query_scale,
+ key_pool_scales=key_pool_scales,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+ self.bottleneck = ConvModule(
+ 2 * in_channels,
+ out_channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ def forward(self, feats):
+ """Forward function."""
+ priors = [stage(feats, feats) for stage in self.stages]
+ context = torch.stack(priors, dim=0).sum(dim=0)
+ output = self.bottleneck(torch.cat([context, feats], 1))
+ return output
+
+
+@HEADS.register_module()
+class ANNHead(BaseDecodeHead):
+ """Asymmetric Non-local Neural Networks for Semantic Segmentation.
+
+ This head is the implementation of `ANNNet
+ `_.
+
+ Args:
+ project_channels (int): Projection channels for Nonlocal.
+ query_scales (tuple[int]): The scales of query feature map.
+ Default: (1,)
+ key_pool_scales (tuple[int]): The pooling scales of key feature map.
+ Default: (1, 3, 6, 8).
+ """
+
+ def __init__(self,
+ project_channels,
+ query_scales=(1, ),
+ key_pool_scales=(1, 3, 6, 8),
+ **kwargs):
+ super(ANNHead, self).__init__(
+ input_transform='multiple_select', **kwargs)
+ assert len(self.in_channels) == 2
+ low_in_channels, high_in_channels = self.in_channels
+ self.project_channels = project_channels
+ self.fusion = AFNB(
+ low_in_channels=low_in_channels,
+ high_in_channels=high_in_channels,
+ out_channels=high_in_channels,
+ channels=project_channels,
+ query_scales=query_scales,
+ key_pool_scales=key_pool_scales,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.bottleneck = ConvModule(
+ high_in_channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.context = APNB(
+ in_channels=self.channels,
+ out_channels=self.channels,
+ channels=project_channels,
+ query_scales=query_scales,
+ key_pool_scales=key_pool_scales,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, inputs):
+ """Forward function."""
+ low_feats, high_feats = self._transform_inputs(inputs)
+ output = self.fusion(low_feats, high_feats)
+ output = self.dropout(output)
+ output = self.bottleneck(output)
+ output = self.context(output)
+ output = self.cls_seg(output)
+
+ return output
diff --git a/mmseg/models/decode_heads/aspp_head.py b/mmseg/models/decode_heads/aspp_head.py
new file mode 100644
index 0000000000..6332ab120c
--- /dev/null
+++ b/mmseg/models/decode_heads/aspp_head.py
@@ -0,0 +1,107 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+class ASPPModule(nn.ModuleList):
+ """Atrous Spatial Pyramid Pooling (ASPP) Module.
+
+ Args:
+ dilations (tuple[int]): Dilation rate of each layer.
+ in_channels (int): Input channels.
+ channels (int): Channels after modules, before conv_seg.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict): Config of activation layers.
+ """
+
+ def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg,
+ act_cfg):
+ super(ASPPModule, self).__init__()
+ self.dilations = dilations
+ self.in_channels = in_channels
+ self.channels = channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ for dilation in dilations:
+ self.append(
+ ConvModule(
+ self.in_channels,
+ self.channels,
+ 1 if dilation == 1 else 3,
+ dilation=dilation,
+ padding=0 if dilation == 1 else dilation,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg))
+
+ def forward(self, x):
+ """Forward function."""
+ aspp_outs = []
+ for aspp_module in self:
+ aspp_outs.append(aspp_module(x))
+
+ return aspp_outs
+
+
+@HEADS.register_module()
+class ASPPHead(BaseDecodeHead):
+ """Rethinking Atrous Convolution for Semantic Image Segmentation.
+
+ This head is the implementation of `DeepLabV3
+ `_.
+
+ Args:
+ dilations (tuple[int]): Dilation rates for ASPP module.
+ Default: (1, 6, 12, 18).
+ """
+
+ def __init__(self, dilations=(1, 6, 12, 18), **kwargs):
+ super(ASPPHead, self).__init__(**kwargs)
+ assert isinstance(dilations, (list, tuple))
+ self.dilations = dilations
+ self.image_pool = nn.Sequential(
+ nn.AdaptiveAvgPool2d(1),
+ ConvModule(
+ self.in_channels,
+ self.channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg))
+ self.aspp_modules = ASPPModule(
+ dilations,
+ self.in_channels,
+ self.channels,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.bottleneck = ConvModule(
+ (len(dilations) + 1) * self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ aspp_outs = [
+ resize(
+ self.image_pool(x),
+ size=x.size()[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ ]
+ aspp_outs.extend(self.aspp_modules(x))
+ aspp_outs = torch.cat(aspp_outs, dim=1)
+ output = self.bottleneck(aspp_outs)
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/cascade_decode_head.py b/mmseg/models/decode_heads/cascade_decode_head.py
new file mode 100644
index 0000000000..d02122ca0e
--- /dev/null
+++ b/mmseg/models/decode_heads/cascade_decode_head.py
@@ -0,0 +1,57 @@
+from abc import ABCMeta, abstractmethod
+
+from .decode_head import BaseDecodeHead
+
+
+class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta):
+ """Base class for cascade decode head used in
+ :class:`CascadeEncoderDecoder."""
+
+ def __init__(self, *args, **kwargs):
+ super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs)
+
+ @abstractmethod
+ def forward(self, inputs, prev_output):
+ """Placeholder of forward function."""
+ pass
+
+ def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg,
+ train_cfg):
+ """Forward function for training.
+ Args:
+ inputs (list[Tensor]): List of multi-level img features.
+ prev_output (Tensor): The output of previous decode head.
+ img_metas (list[dict]): List of image info dict where each dict
+ has: 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmseg/datasets/pipelines/formatting.py:Collect`.
+ gt_semantic_seg (Tensor): Semantic segmentation masks
+ used if the architecture supports semantic segmentation task.
+ train_cfg (dict): The training config.
+
+ Returns:
+ dict[str, Tensor]: a dictionary of loss components
+ """
+ seg_logits = self.forward(inputs, prev_output)
+ losses = self.losses(seg_logits, gt_semantic_seg)
+
+ return losses
+
+ def forward_test(self, inputs, prev_output, img_metas, test_cfg):
+ """Forward function for testing.
+
+ Args:
+ inputs (list[Tensor]): List of multi-level img features.
+ prev_output (Tensor): The output of previous decode head.
+ img_metas (list[dict]): List of image info dict where each dict
+ has: 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmseg/datasets/pipelines/formatting.py:Collect`.
+ test_cfg (dict): The testing config.
+
+ Returns:
+ Tensor: Output segmentation map.
+ """
+ return self.forward(inputs, prev_output)
diff --git a/mmseg/models/decode_heads/cc_head.py b/mmseg/models/decode_heads/cc_head.py
new file mode 100644
index 0000000000..95c2706a5d
--- /dev/null
+++ b/mmseg/models/decode_heads/cc_head.py
@@ -0,0 +1,42 @@
+import torch
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+try:
+ from mmcv.ops import CrissCrossAttention
+except ModuleNotFoundError:
+ CrissCrossAttention = None
+
+
+@HEADS.register_module()
+class CCHead(FCNHead):
+ """CCNet: Criss-Cross Attention for Semantic Segmentation.
+
+ This head is the implementation of `CCNet
+ `_.
+
+ Args:
+ recurrence (int): Number of recurrence of Criss Cross Attention
+ module. Default: 2.
+ """
+
+ def __init__(self, recurrence=2, **kwargs):
+ if CrissCrossAttention is None:
+ raise RuntimeError('Please install mmcv-full for '
+ 'CrissCrossAttention ops')
+ super(CCHead, self).__init__(num_convs=2, **kwargs)
+ self.recurrence = recurrence
+ self.cca = CrissCrossAttention(self.channels)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ output = self.convs[0](x)
+ for _ in range(self.recurrence):
+ output = self.cca(output)
+ output = self.convs[1](output)
+ if self.concat_input:
+ output = self.conv_cat(torch.cat([x, output], dim=1))
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/da_head.py b/mmseg/models/decode_heads/da_head.py
new file mode 100644
index 0000000000..8ee0e08c3d
--- /dev/null
+++ b/mmseg/models/decode_heads/da_head.py
@@ -0,0 +1,178 @@
+import torch
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, Scale
+from torch import nn
+
+from mmseg.core import add_prefix
+from ..builder import HEADS
+from ..utils import SelfAttentionBlock as _SelfAttentionBlock
+from .decode_head import BaseDecodeHead
+
+
+class PAM(_SelfAttentionBlock):
+ """Position Attention Module (PAM)
+
+ Args:
+ in_channels (int): Input channels of key/query feature.
+ channels (int): Output channels of key/query transform.
+ """
+
+ def __init__(self, in_channels, channels):
+ super(PAM, self).__init__(
+ key_in_channels=in_channels,
+ query_in_channels=in_channels,
+ channels=channels,
+ out_channels=in_channels,
+ share_key_query=False,
+ query_downsample=None,
+ key_downsample=None,
+ key_query_num_convs=1,
+ key_query_norm=False,
+ value_out_num_convs=1,
+ value_out_norm=False,
+ matmul_norm=False,
+ with_out=False,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=None)
+
+ self.gamma = Scale(0)
+
+ def forward(self, x):
+ """Forward function."""
+ out = super(PAM, self).forward(x, x)
+
+ out = self.gamma(out) + x
+ return out
+
+
+class CAM(nn.Module):
+ """Channel Attention Module (CAM)"""
+
+ def __init__(self):
+ super(CAM, self).__init__()
+ self.gamma = Scale(0)
+
+ def forward(self, x):
+ """Forward function."""
+ batch_size, channels, height, width = x.size()
+ proj_query = x.view(batch_size, channels, -1)
+ proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1)
+ energy = torch.bmm(proj_query, proj_key)
+ energy_new = torch.max(
+ energy, -1, keepdim=True)[0].expand_as(energy) - energy
+ attention = F.softmax(energy_new, dim=-1)
+ proj_value = x.view(batch_size, channels, -1)
+
+ out = torch.bmm(attention, proj_value)
+ out = out.view(batch_size, channels, height, width)
+
+ out = self.gamma(out) + x
+ return out
+
+
+@HEADS.register_module()
+class DAHead(BaseDecodeHead):
+ """Dual Attention Network for Scene Segmentation.
+
+ This head is the implementation of `DANet
+ `_.
+
+ Args:
+ pam_channels (int): The channels of Position Attention Module(PAM).
+ """
+
+ def __init__(self, pam_channels, **kwargs):
+ super(DAHead, self).__init__(**kwargs)
+ self.pam_channels = pam_channels
+ self.pam_in_conv = ConvModule(
+ self.in_channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.pam = PAM(self.channels, pam_channels)
+ self.pam_out_conv = ConvModule(
+ self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.pam_conv_seg = nn.Conv2d(
+ self.channels, self.num_classes, kernel_size=1)
+
+ self.cam_in_conv = ConvModule(
+ self.in_channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.cam = CAM()
+ self.cam_out_conv = ConvModule(
+ self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.cam_conv_seg = nn.Conv2d(
+ self.channels, self.num_classes, kernel_size=1)
+
+ def pam_cls_seg(self, feat):
+ """PAM feature classification."""
+ if self.dropout is not None:
+ feat = self.dropout(feat)
+ output = self.pam_conv_seg(feat)
+ return output
+
+ def cam_cls_seg(self, feat):
+ """CAM feature classification."""
+ if self.dropout is not None:
+ feat = self.dropout(feat)
+ output = self.cam_conv_seg(feat)
+ return output
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ pam_feat = self.pam_in_conv(x)
+ pam_feat = self.pam(pam_feat)
+ pam_feat = self.pam_out_conv(pam_feat)
+ pam_out = self.pam_cls_seg(pam_feat)
+
+ cam_feat = self.cam_in_conv(x)
+ cam_feat = self.cam(cam_feat)
+ cam_feat = self.cam_out_conv(cam_feat)
+ cam_out = self.cam_cls_seg(cam_feat)
+
+ feat_sum = pam_feat + cam_feat
+ pam_cam_out = self.cls_seg(feat_sum)
+
+ return pam_cam_out, pam_out, cam_out
+
+ def forward_test(self, inputs, img_metas, test_cfg):
+ """Forward function for testing, only ``pam_cam`` is used."""
+ return self.forward(inputs)[0]
+
+ def losses(self, seg_logit, seg_label):
+ """Compute ``pam_cam``, ``pam``, ``cam`` loss."""
+ pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit
+ loss = dict()
+ loss.update(
+ add_prefix(
+ super(DAHead, self).losses(pam_cam_seg_logit, seg_label),
+ 'pam_cam'))
+ loss.update(
+ add_prefix(
+ super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam'))
+ loss.update(
+ add_prefix(
+ super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam'))
+ return loss
diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py
new file mode 100644
index 0000000000..d4c8748722
--- /dev/null
+++ b/mmseg/models/decode_heads/decode_head.py
@@ -0,0 +1,229 @@
+from abc import ABCMeta, abstractmethod
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmseg.core import build_pixel_sampler
+from mmseg.ops import resize
+from ..builder import build_loss
+from ..losses import accuracy
+
+
+class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
+ """Base class for BaseDecodeHead.
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ channels (int): Channels after modules, before conv_seg.
+ num_classes (int): Number of classes.
+ drop_out_ratio (float): Ratio of dropout layer. Default: 0.1.
+ conv_cfg (dict|None): Config of conv layers. Default: None.
+ norm_cfg (dict|None): Config of norm layers. Default: None.
+ act_cfg (dict): Config of activation layers.
+ Default: dict(type='ReLU')
+ in_index (int|Sequence[int]): Input feature index. Default: -1
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ None: Only one select feature map is allowed.
+ Default: None.
+ loss_decode (dict): Config of decode loss.
+ Default: dict(type='CrossEntropyLoss').
+ ignore_index (int): The label index to be ignored. Default: 255
+ sampler (dict|None): The config of segmentation map sampler.
+ Default: None.
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ """
+
+ def __init__(self,
+ in_channels,
+ channels,
+ *,
+ num_classes,
+ drop_out_ratio=0.1,
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ in_index=-1,
+ input_transform=None,
+ loss_decode=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ ignore_index=255,
+ sampler=None,
+ align_corners=False):
+ super(BaseDecodeHead, self).__init__()
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.channels = channels
+ self.num_classes = num_classes
+ self.drop_out_ratio = drop_out_ratio
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.in_index = in_index
+ self.loss_decode = build_loss(loss_decode)
+ self.ignore_index = ignore_index
+ self.align_corners = align_corners
+ if sampler is not None:
+ self.sampler = build_pixel_sampler(sampler)
+ else:
+ self.sampler = None
+
+ self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1)
+ if drop_out_ratio > 0:
+ self.dropout = nn.Dropout2d(drop_out_ratio)
+ else:
+ self.dropout = None
+
+ def extra_repr(self):
+ """Extra repr."""
+ s = f'input_transform={self.input_transform}, ' \
+ f'ignore_index={self.ignore_index}, ' \
+ f'align_corners={self.align_corners}'
+ return s
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+
+ The in_channels, in_index and input_transform must match.
+ Specifically, when input_transform is None, only single feature map
+ will be selected. So in_channels and in_index must be of type int.
+ When input_transform
+
+ Args:
+ in_channels (int|Sequence[int]): Input channels.
+ in_index (int|Sequence[int]): Input feature index.
+ input_transform (str|None): Transformation type of input features.
+ Options: 'resize_concat', 'multiple_select', None.
+ 'resize_concat': Multiple feature maps will be resize to the
+ same size as first one and than concat together.
+ Usually used in FCN head of HRNet.
+ 'multiple_select': Multiple feature maps will be bundle into
+ a list and passed into decode head.
+ None: Only one select feature map is allowed.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def init_weights(self):
+ """Initialize weights of classification layer."""
+ normal_init(self.conv_seg, mean=0, std=0.01)
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+
+ Args:
+ inputs (list[Tensor]): List of multi-level img features.
+
+ Returns:
+ Tensor: The transformed inputs
+ """
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = torch.cat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ @abstractmethod
+ def forward(self, inputs):
+ """Placeholder of forward function."""
+ pass
+
+ def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg):
+ """Forward function for training.
+ Args:
+ inputs (list[Tensor]): List of multi-level img features.
+ img_metas (list[dict]): List of image info dict where each dict
+ has: 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmseg/datasets/pipelines/formatting.py:Collect`.
+ gt_semantic_seg (Tensor): Semantic segmentation masks
+ used if the architecture supports semantic segmentation task.
+ train_cfg (dict): The training config.
+
+ Returns:
+ dict[str, Tensor]: a dictionary of loss components
+ """
+ seg_logits = self.forward(inputs)
+ losses = self.losses(seg_logits, gt_semantic_seg)
+ return losses
+
+ def forward_test(self, inputs, img_metas, test_cfg):
+ """Forward function for testing.
+
+ Args:
+ inputs (list[Tensor]): List of multi-level img features.
+ img_metas (list[dict]): List of image info dict where each dict
+ has: 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmseg/datasets/pipelines/formatting.py:Collect`.
+ test_cfg (dict): The testing config.
+
+ Returns:
+ Tensor: Output segmentation map.
+ """
+ return self.forward(inputs)
+
+ def cls_seg(self, feat):
+ """Classify each pixel."""
+ if self.dropout is not None:
+ feat = self.dropout(feat)
+ output = self.conv_seg(feat)
+ return output
+
+ def losses(self, seg_logit, seg_label):
+ """Compute segmentation loss."""
+ loss = dict()
+ seg_logit = resize(
+ input=seg_logit,
+ size=seg_label.shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ if self.sampler is not None:
+ seg_weight = self.sampler.sample(seg_logit, seg_label)
+ else:
+ seg_weight = None
+ seg_label = seg_label.squeeze(1)
+ loss['loss_seg'] = self.loss_decode(
+ seg_logit,
+ seg_label,
+ weight=seg_weight,
+ ignore_index=self.ignore_index)
+ loss['acc_seg'] = accuracy(seg_logit, seg_label)
+ return loss
diff --git a/mmseg/models/decode_heads/enc_head.py b/mmseg/models/decode_heads/enc_head.py
new file mode 100644
index 0000000000..0c11994cf6
--- /dev/null
+++ b/mmseg/models/decode_heads/enc_head.py
@@ -0,0 +1,187 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, build_norm_layer
+
+from mmseg.ops import Encoding, resize
+from ..builder import HEADS, build_loss
+from .decode_head import BaseDecodeHead
+
+
+class EncModule(nn.Module):
+ """Encoding Module used in EncNet.
+
+ Args:
+ in_channels (int): Input channels.
+ num_codes (int): Number of code words.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict): Config of activation layers.
+ """
+
+ def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg):
+ super(EncModule, self).__init__()
+ self.encoding_project = ConvModule(
+ in_channels,
+ in_channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ # TODO: resolve this hack
+ # change to 1d
+ if norm_cfg is not None:
+ encoding_norm_cfg = norm_cfg.copy()
+ if encoding_norm_cfg['type'] in ['BN', 'IN']:
+ encoding_norm_cfg['type'] += '1d'
+ else:
+ encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace(
+ '2d', '1d')
+ else:
+ # fallback to BN1d
+ encoding_norm_cfg = dict(type='BN1d')
+ self.encoding = nn.Sequential(
+ Encoding(channels=in_channels, num_codes=num_codes),
+ build_norm_layer(encoding_norm_cfg, num_codes)[1],
+ nn.ReLU(inplace=True))
+ self.fc = nn.Sequential(
+ nn.Linear(in_channels, in_channels), nn.Sigmoid())
+
+ def forward(self, x):
+ """Forward function."""
+ encoding_projection = self.encoding_project(x)
+ encoding_feat = self.encoding(encoding_projection).mean(dim=1)
+ batch_size, channels, _, _ = x.size()
+ gamma = self.fc(encoding_feat)
+ y = gamma.view(batch_size, channels, 1, 1)
+ output = F.relu_(x + x * y)
+ return encoding_feat, output
+
+
+@HEADS.register_module()
+class EncHead(BaseDecodeHead):
+ """Context Encoding for Semantic Segmentation.
+
+ This head is the implementation of `EncNet
+ `_.
+
+ Args:
+ num_codes (int): Number of code words. Default: 32.
+ use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to
+ regularize the training. Default: True.
+ add_lateral (bool): Whether use lateral connection to fuse features.
+ Default: False.
+ loss_se_decode (dict): Config of decode loss.
+ Default: dict(type='CrossEntropyLoss', use_sigmoid=True).
+ """
+
+ def __init__(self,
+ num_codes=32,
+ use_se_loss=True,
+ add_lateral=False,
+ loss_se_decode=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=0.2),
+ **kwargs):
+ super(EncHead, self).__init__(
+ input_transform='multiple_select', **kwargs)
+ self.use_se_loss = use_se_loss
+ self.add_lateral = add_lateral
+ self.num_codes = num_codes
+ self.bottleneck = ConvModule(
+ self.in_channels[-1],
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ if add_lateral:
+ self.lateral_convs = nn.ModuleList()
+ for in_channels in self.in_channels[:-1]: # skip the last one
+ self.lateral_convs.append(
+ ConvModule(
+ in_channels,
+ self.channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg))
+ self.fusion = ConvModule(
+ len(self.in_channels) * self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.enc_module = EncModule(
+ self.channels,
+ num_codes=num_codes,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ if self.use_se_loss:
+ self.loss_se_decode = build_loss(loss_se_decode)
+ self.se_layer = nn.Linear(self.channels, self.num_classes)
+
+ def forward(self, inputs):
+ """Forward function."""
+ inputs = self._transform_inputs(inputs)
+ feat = self.bottleneck(inputs[-1])
+ if self.add_lateral:
+ laterals = [
+ resize(
+ lateral_conv(inputs[i]),
+ size=feat.shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ for i, lateral_conv in enumerate(self.lateral_convs)
+ ]
+ feat = self.fusion(torch.cat([feat, *laterals], 1))
+ encode_feat, output = self.enc_module(feat)
+ output = self.cls_seg(output)
+ if self.use_se_loss:
+ se_output = self.se_layer(encode_feat)
+ return output, se_output
+ else:
+ return output
+
+ def forward_test(self, inputs, img_metas, test_cfg):
+ """Forward function for testing, ignore se_loss."""
+ if self.use_se_loss:
+ return self.forward(inputs)[0]
+ else:
+ return self.forward(inputs)
+
+ @staticmethod
+ def _convert_to_onehot_labels(seg_label, num_classes):
+ """Convert segmentation label to onehot.
+
+ Args:
+ seg_label (Tensor): Segmentation label of shape (N, H, W).
+ num_classes (int): Number of classes.
+
+ Returns:
+ Tensor: Onehot labels of shape (N, num_classes).
+ """
+
+ batch_size = seg_label.size(0)
+ onehot_labels = seg_label.new_zeros((batch_size, num_classes))
+ for i in range(batch_size):
+ hist = seg_label[i].float().histc(
+ bins=num_classes, min=0, max=num_classes - 1)
+ onehot_labels[i] = hist > 0
+ return onehot_labels
+
+ def losses(self, seg_logit, seg_label):
+ """Compute segmentation and semantic encoding loss."""
+ seg_logit, se_seg_logit = seg_logit
+ loss = dict()
+ loss.update(super(EncHead, self).losses(seg_logit, seg_label))
+ se_loss = self.loss_se_decode(
+ se_seg_logit,
+ self._convert_to_onehot_labels(seg_label, self.num_classes))
+ loss['loss_se'] = se_loss
+ return loss
diff --git a/mmseg/models/decode_heads/fcn_head.py b/mmseg/models/decode_heads/fcn_head.py
new file mode 100644
index 0000000000..e586a2e0d4
--- /dev/null
+++ b/mmseg/models/decode_heads/fcn_head.py
@@ -0,0 +1,69 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+@HEADS.register_module()
+class FCNHead(BaseDecodeHead):
+ """Fully Convolution Networks for Semantic Segmentation.
+
+ This head is implemented of `FCNNet `_.
+
+ Args:
+ num_convs (int): Number of convs in the head. Default: 2.
+ kernel_size (int): The kernel size for convs in the head. Default: 3.
+ concat_input (bool): Whether concat the input and output of convs
+ before classification layer.
+ """
+
+ def __init__(self,
+ num_convs=2,
+ kernel_size=3,
+ concat_input=True,
+ **kwargs):
+ assert num_convs > 0
+ self.num_convs = num_convs
+ self.concat_input = concat_input
+ super(FCNHead, self).__init__(**kwargs)
+ convs = []
+ convs.append(
+ ConvModule(
+ self.in_channels,
+ self.channels,
+ kernel_size=kernel_size,
+ padding=kernel_size // 2,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg))
+ for i in range(num_convs - 1):
+ convs.append(
+ ConvModule(
+ self.channels,
+ self.channels,
+ kernel_size=kernel_size,
+ padding=kernel_size // 2,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg))
+ self.convs = nn.Sequential(*convs)
+ if self.concat_input:
+ self.conv_cat = ConvModule(
+ self.in_channels + self.channels,
+ self.channels,
+ kernel_size=kernel_size,
+ padding=kernel_size // 2,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ output = self.convs(x)
+ if self.concat_input:
+ output = self.conv_cat(torch.cat([x, output], dim=1))
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/gc_head.py b/mmseg/models/decode_heads/gc_head.py
new file mode 100644
index 0000000000..3368663750
--- /dev/null
+++ b/mmseg/models/decode_heads/gc_head.py
@@ -0,0 +1,47 @@
+import torch
+from mmcv.cnn import ContextBlock
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+@HEADS.register_module()
+class GCHead(FCNHead):
+ """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond.
+
+ This head is the implementation of `GCNet
+ `_.
+
+ Args:
+ ratio (float): Multiplier of channels ratio. Default: 1/4.
+ pooling_type (str): The pooling type of context aggregation.
+ Options are 'att', 'avg'. Default: 'avg'.
+ fusion_types (tuple[str]): The fusion type for feature fusion.
+ Options are 'channel_add', 'channel_mul'. Defautl: ('channel_add',)
+ """
+
+ def __init__(self,
+ ratio=1 / 4.,
+ pooling_type='att',
+ fusion_types=('channel_add', ),
+ **kwargs):
+ super(GCHead, self).__init__(num_convs=2, **kwargs)
+ self.ratio = ratio
+ self.pooling_type = pooling_type
+ self.fusion_types = fusion_types
+ self.gc_block = ContextBlock(
+ in_channels=self.channels,
+ ratio=self.ratio,
+ pooling_type=self.pooling_type,
+ fusion_types=self.fusion_types)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ output = self.convs[0](x)
+ output = self.gc_block(output)
+ output = self.convs[1](output)
+ if self.concat_input:
+ output = self.conv_cat(torch.cat([x, output], dim=1))
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/nl_head.py b/mmseg/models/decode_heads/nl_head.py
new file mode 100644
index 0000000000..31658755a6
--- /dev/null
+++ b/mmseg/models/decode_heads/nl_head.py
@@ -0,0 +1,49 @@
+import torch
+from mmcv.cnn import NonLocal2d
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+@HEADS.register_module()
+class NLHead(FCNHead):
+ """Non-local Neural Networks.
+
+ This head is the implementation of `NLNet
+ `_.
+
+ Args:
+ reduction (int): Reduction factor of projection transform. Default: 2.
+ use_scale (bool): Whether to scale pairwise_weight by
+ sqrt(1/inter_channels). Default: True.
+ mode (str): The nonlocal mode. Options are 'embedded_gaussian',
+ 'dot_product'. Default: 'embedded_gaussian.'.
+ """
+
+ def __init__(self,
+ reduction=2,
+ use_scale=True,
+ mode='embedded_gaussian',
+ **kwargs):
+ super(NLHead, self).__init__(num_convs=2, **kwargs)
+ self.reduction = reduction
+ self.use_scale = use_scale
+ self.mode = mode
+ self.nl_block = NonLocal2d(
+ in_channels=self.channels,
+ reduction=self.reduction,
+ use_scale=self.use_scale,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ mode=self.mode)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ output = self.convs[0](x)
+ output = self.nl_block(output)
+ output = self.convs[1](output)
+ if self.concat_input:
+ output = self.conv_cat(torch.cat([x, output], dim=1))
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/ocr_head.py b/mmseg/models/decode_heads/ocr_head.py
new file mode 100644
index 0000000000..e180e10276
--- /dev/null
+++ b/mmseg/models/decode_heads/ocr_head.py
@@ -0,0 +1,127 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from ..utils import SelfAttentionBlock as _SelfAttentionBlock
+from .cascade_decode_head import BaseCascadeDecodeHead
+
+
+class SpatialGatherModule(nn.Module):
+ """Aggregate the context features according to the initial predicted
+ probability distribution.
+
+ Employ the soft-weighted method to aggregate the context.
+ """
+
+ def __init__(self, scale):
+ super(SpatialGatherModule, self).__init__()
+ self.scale = scale
+
+ def forward(self, feats, probs):
+ """Forward function."""
+ batch_size, num_classes, height, width = probs.size()
+ channels = feats.size(1)
+ probs = probs.view(batch_size, num_classes, -1)
+ feats = feats.view(batch_size, channels, -1)
+ # [batch_size, height*width, num_classes]
+ feats = feats.permute(0, 2, 1)
+ # [batch_size, channels, height*width]
+ probs = F.softmax(self.scale * probs, dim=2)
+ # [batch_size, channels, num_classes]
+ ocr_context = torch.matmul(probs, feats)
+ ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3)
+ return ocr_context
+
+
+class ObjectAttentionBlock(_SelfAttentionBlock):
+ """Make a OCR used SelfAttentionBlock."""
+
+ def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg,
+ act_cfg):
+ if scale > 1:
+ query_downsample = nn.MaxPool2d(kernel_size=scale)
+ else:
+ query_downsample = None
+ super(ObjectAttentionBlock, self).__init__(
+ key_in_channels=in_channels,
+ query_in_channels=in_channels,
+ channels=channels,
+ out_channels=in_channels,
+ share_key_query=False,
+ query_downsample=query_downsample,
+ key_downsample=None,
+ key_query_num_convs=2,
+ key_query_norm=True,
+ value_out_num_convs=1,
+ value_out_norm=True,
+ matmul_norm=True,
+ with_out=True,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.bottleneck = ConvModule(
+ in_channels * 2,
+ in_channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, query_feats, key_feats):
+ """Forward function."""
+ context = super(ObjectAttentionBlock,
+ self).forward(query_feats, key_feats)
+ output = self.bottleneck(torch.cat([context, query_feats], dim=1))
+ if self.query_downsample is not None:
+ output = resize(query_feats)
+
+ return output
+
+
+@HEADS.register_module()
+class OCRHead(BaseCascadeDecodeHead):
+ """Object-Contextual Representations for Semantic Segmentation.
+
+ This head is the implementation of `OCRNet
+ `_.
+
+ Args:
+ ocr_channels (int): The intermediate channels of OCR block.
+ scale (int): The scale of probability map in SpatialGatherModule in
+ Default: 1.
+ """
+
+ def __init__(self, ocr_channels, scale=1, **kwargs):
+ super(OCRHead, self).__init__(**kwargs)
+ self.ocr_channels = ocr_channels
+ self.scale = scale
+ self.object_context_block = ObjectAttentionBlock(
+ self.channels,
+ self.ocr_channels,
+ self.scale,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.spatial_gather_module = SpatialGatherModule(self.scale)
+
+ self.bottleneck = ConvModule(
+ self.in_channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, inputs, prev_output):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ feats = self.bottleneck(x)
+ context = self.spatial_gather_module(feats, prev_output)
+ object_context = self.object_context_block(feats, context)
+ output = self.cls_seg(object_context)
+
+ return output
diff --git a/mmseg/models/decode_heads/psa_head.py b/mmseg/models/decode_heads/psa_head.py
new file mode 100644
index 0000000000..8d915e57f4
--- /dev/null
+++ b/mmseg/models/decode_heads/psa_head.py
@@ -0,0 +1,196 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+try:
+ from mmcv.ops import PSAMask
+except ModuleNotFoundError:
+ PSAMask = None
+
+
+@HEADS.register_module()
+class PSAHead(BaseDecodeHead):
+ """Point-wise Spatial Attention Network for Scene Parsing.
+
+ This head is the implementation of `PSANet
+ `_.
+
+ Args:
+ mask_size (tuple[int]): The PSA mask size. It usually equals input
+ size.
+ psa_type (str): The type of psa module. Options are 'collect',
+ 'distribute', 'bi-direction'. Default: 'bi-direction'
+ compact (bool): Whether use compact map for 'collect' mode.
+ Default: True.
+ shrink_factor (int): The downsample factors of psa mask. Default: 2.
+ normalization_factor (float): The normalize factor of attention.
+ psa_softmax (bool): Whether use softmax for attention.
+ """
+
+ def __init__(self,
+ mask_size,
+ psa_type='bi-direction',
+ compact=False,
+ shrink_factor=2,
+ normalization_factor=1.0,
+ psa_softmax=True,
+ **kwargs):
+ if PSAMask is None:
+ raise RuntimeError('Please install mmcv-full for PSAMask ops')
+ super(PSAHead, self).__init__(**kwargs)
+ assert psa_type in ['collect', 'distribute', 'bi-direction']
+ self.psa_type = psa_type
+ self.compact = compact
+ self.shrink_factor = shrink_factor
+ self.mask_size = mask_size
+ mask_h, mask_w = mask_size
+ self.psa_softmax = psa_softmax
+ if normalization_factor is None:
+ normalization_factor = mask_h * mask_w
+ self.normalization_factor = normalization_factor
+
+ self.reduce = ConvModule(
+ self.in_channels,
+ self.channels,
+ kernel_size=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.attention = nn.Sequential(
+ ConvModule(
+ self.channels,
+ self.channels,
+ kernel_size=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg),
+ nn.Conv2d(
+ self.channels, mask_h * mask_w, kernel_size=1, bias=False))
+ if psa_type == 'bi-direction':
+ self.reduce_p = ConvModule(
+ self.in_channels,
+ self.channels,
+ kernel_size=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.attention_p = nn.Sequential(
+ ConvModule(
+ self.channels,
+ self.channels,
+ kernel_size=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg),
+ nn.Conv2d(
+ self.channels, mask_h * mask_w, kernel_size=1, bias=False))
+ self.psamask_collect = PSAMask('collect', mask_size)
+ self.psamask_distribute = PSAMask('distribute', mask_size)
+ else:
+ self.psamask = PSAMask(psa_type, mask_size)
+ self.proj = ConvModule(
+ self.channels * (2 if psa_type == 'bi-direction' else 1),
+ self.in_channels,
+ kernel_size=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ self.bottleneck = ConvModule(
+ self.in_channels * 2,
+ self.channels,
+ kernel_size=3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ identity = x
+ align_corners = self.align_corners
+ if self.psa_type in ['collect', 'distribute']:
+ out = self.reduce(x)
+ n, c, h, w = out.size()
+ if self.shrink_factor != 1:
+ if h % self.shrink_factor and w % self.shrink_factor:
+ h = (h - 1) // self.shrink_factor + 1
+ w = (w - 1) // self.shrink_factor + 1
+ align_corners = True
+ else:
+ h = h // self.shrink_factor
+ w = w // self.shrink_factor
+ align_corners = False
+ out = resize(
+ out,
+ size=(h, w),
+ mode='bilinear',
+ align_corners=align_corners)
+ y = self.attention(out)
+ if self.compact:
+ if self.psa_type == 'collect':
+ y = y.view(n, h * w,
+ h * w).transpose(1, 2).view(n, h * w, h, w)
+ else:
+ y = self.psamask(y)
+ if self.psa_softmax:
+ y = F.softmax(y, dim=1)
+ out = torch.bmm(
+ out.view(n, c, h * w), y.view(n, h * w, h * w)).view(
+ n, c, h, w) * (1.0 / self.normalization_factor)
+ else:
+ x_col = self.reduce(x)
+ x_dis = self.reduce_p(x)
+ n, c, h, w = x_col.size()
+ if self.shrink_factor != 1:
+ if h % self.shrink_factor and w % self.shrink_factor:
+ h = (h - 1) // self.shrink_factor + 1
+ w = (w - 1) // self.shrink_factor + 1
+ align_corners = True
+ else:
+ h = h // self.shrink_factor
+ w = w // self.shrink_factor
+ align_corners = False
+ x_col = resize(
+ x_col,
+ size=(h, w),
+ mode='bilinear',
+ align_corners=align_corners)
+ x_dis = resize(
+ x_dis,
+ size=(h, w),
+ mode='bilinear',
+ align_corners=align_corners)
+ y_col = self.attention(x_col)
+ y_dis = self.attention_p(x_dis)
+ if self.compact:
+ y_dis = y_dis.view(n, h * w,
+ h * w).transpose(1, 2).view(n, h * w, h, w)
+ else:
+ y_col = self.psamask_collect(y_col)
+ y_dis = self.psamask_distribute(y_dis)
+ if self.psa_softmax:
+ y_col = F.softmax(y_col, dim=1)
+ y_dis = F.softmax(y_dis, dim=1)
+ x_col = torch.bmm(
+ x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view(
+ n, c, h, w) * (1.0 / self.normalization_factor)
+ x_dis = torch.bmm(
+ x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view(
+ n, c, h, w) * (1.0 / self.normalization_factor)
+ out = torch.cat([x_col, x_dis], 1)
+ out = self.proj(out)
+ out = resize(
+ out,
+ size=identity.shape[2:],
+ mode='bilinear',
+ align_corners=align_corners)
+ out = self.bottleneck(torch.cat((identity, out), dim=1))
+ out = self.cls_seg(out)
+ return out
diff --git a/mmseg/models/decode_heads/psp_head.py b/mmseg/models/decode_heads/psp_head.py
new file mode 100644
index 0000000000..bdbe2c8ac8
--- /dev/null
+++ b/mmseg/models/decode_heads/psp_head.py
@@ -0,0 +1,101 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+class PPM(nn.ModuleList):
+ """Pooling Pyramid Module used in PSPNet.
+
+ Args:
+ pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module.
+ in_channels (int): Input channels.
+ channels (int): Channels after modules, before conv_seg.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict): Config of activation layers.
+ align_corners (bool): align_corners argument of F.interpolate.
+ """
+
+ def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg,
+ act_cfg, align_corners):
+ super(PPM, self).__init__()
+ self.pool_scales = pool_scales
+ self.align_corners = align_corners
+ self.in_channels = in_channels
+ self.channels = channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ for pool_scale in pool_scales:
+ self.append(
+ nn.Sequential(
+ nn.AdaptiveAvgPool2d(pool_scale),
+ ConvModule(
+ self.in_channels,
+ self.channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)))
+
+ def forward(self, x):
+ """Forward function."""
+ ppm_outs = []
+ for ppm in self:
+ ppm_out = ppm(x)
+ upsampled_ppm_out = resize(
+ ppm_out,
+ size=x.size()[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ ppm_outs.append(upsampled_ppm_out)
+ return ppm_outs
+
+
+@HEADS.register_module()
+class PSPHead(BaseDecodeHead):
+ """Pyramid Scene Parsing Network.
+
+ This head is the implementation of
+ `PSPNet `_.
+
+ Args:
+ pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module. Default: (1, 2, 3, 6).
+ """
+
+ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+ super(PSPHead, self).__init__(**kwargs)
+ assert isinstance(pool_scales, (list, tuple))
+ self.pool_scales = pool_scales
+ self.psp_modules = PPM(
+ self.pool_scales,
+ self.in_channels,
+ self.channels,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ align_corners=self.align_corners)
+ self.bottleneck = ConvModule(
+ self.in_channels + len(pool_scales) * self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ psp_outs = [x]
+ psp_outs.extend(self.psp_modules(x))
+ psp_outs = torch.cat(psp_outs, dim=1)
+ output = self.bottleneck(psp_outs)
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/sep_aspp_head.py b/mmseg/models/decode_heads/sep_aspp_head.py
new file mode 100644
index 0000000000..71881890bd
--- /dev/null
+++ b/mmseg/models/decode_heads/sep_aspp_head.py
@@ -0,0 +1,101 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import DepthwiseSeparableConvModule, resize
+from ..builder import HEADS
+from .aspp_head import ASPPHead, ASPPModule
+
+
+class DepthwiseSeparableASPPModule(ASPPModule):
+ """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable
+ conv."""
+
+ def __init__(self, **kwargs):
+ super(DepthwiseSeparableASPPModule, self).__init__(**kwargs)
+ for i, dilation in enumerate(self.dilations):
+ if dilation > 1:
+ self[i] = DepthwiseSeparableConvModule(
+ self.in_channels,
+ self.channels,
+ 3,
+ dilation=dilation,
+ padding=dilation,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+
+@HEADS.register_module()
+class DepthwiseSeparableASPPHead(ASPPHead):
+ """Encoder-Decoder with Atrous Separable Convolution for Semantic Image
+ Segmentation.
+
+ This head is the implementation of `DeepLabV3+
+ `_.
+
+ Args:
+ c1_in_channels (int): The input channels of c1 decoder. If is 0,
+ the no decoder will be used.
+ c1_channels (int): The intermediate channels of c1 decoder.
+ """
+
+ def __init__(self, c1_in_channels, c1_channels, **kwargs):
+ super(DepthwiseSeparableASPPHead, self).__init__(**kwargs)
+ assert c1_in_channels >= 0
+ self.aspp_modules = DepthwiseSeparableASPPModule(
+ dilations=self.dilations,
+ in_channels=self.in_channels,
+ channels=self.channels,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ if c1_in_channels > 0:
+ self.c1_bottleneck = ConvModule(
+ c1_in_channels,
+ c1_channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ else:
+ self.c1_bottleneck = None
+ self.sep_bottleneck = nn.Sequential(
+ DepthwiseSeparableConvModule(
+ self.channels + c1_channels,
+ self.channels,
+ 3,
+ padding=1,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg),
+ DepthwiseSeparableConvModule(
+ self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg))
+
+ def forward(self, inputs):
+ """Forward function."""
+ x = self._transform_inputs(inputs)
+ aspp_outs = [
+ resize(
+ self.image_pool(x),
+ size=x.size()[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ ]
+ aspp_outs.extend(self.aspp_modules(x))
+ aspp_outs = torch.cat(aspp_outs, dim=1)
+ output = self.bottleneck(aspp_outs)
+ if self.c1_bottleneck is not None:
+ c1_output = self.c1_bottleneck(inputs[0])
+ output = resize(
+ input=output,
+ size=c1_output.shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ output = torch.cat([output, c1_output], dim=1)
+ output = self.sep_bottleneck(output)
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/decode_heads/uper_head.py b/mmseg/models/decode_heads/uper_head.py
new file mode 100644
index 0000000000..bb617f6b13
--- /dev/null
+++ b/mmseg/models/decode_heads/uper_head.py
@@ -0,0 +1,126 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+from .psp_head import PPM
+
+
+@HEADS.register_module()
+class UPerHead(BaseDecodeHead):
+ """Unified Perceptual Parsing for Scene Understanding.
+
+ This head is the implementation of `UPerNet
+ `_.
+
+ Args:
+ pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module applied on the last feature. Default: (1, 2, 3, 6).
+ """
+
+ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+ super(UPerHead, self).__init__(
+ input_transform='multiple_select', **kwargs)
+ # PSP Module
+ self.psp_modules = PPM(
+ pool_scales,
+ self.in_channels[-1],
+ self.channels,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ align_corners=self.align_corners)
+ self.bottleneck = ConvModule(
+ self.in_channels[-1] + len(pool_scales) * self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+ # FPN Module
+ self.lateral_convs = nn.ModuleList()
+ self.fpn_convs = nn.ModuleList()
+ for in_channels in self.in_channels[:-1]: # skip the top layer
+ l_conv = ConvModule(
+ in_channels,
+ self.channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ inplace=False)
+ fpn_conv = ConvModule(
+ self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ inplace=False)
+ self.lateral_convs.append(l_conv)
+ self.fpn_convs.append(fpn_conv)
+
+ self.fpn_bottleneck = ConvModule(
+ len(self.in_channels) * self.channels,
+ self.channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg)
+
+ def psp_forward(self, inputs):
+ """Forward function of PSP module."""
+ x = inputs[-1]
+ psp_outs = [x]
+ psp_outs.extend(self.psp_modules(x))
+ psp_outs = torch.cat(psp_outs, dim=1)
+ output = self.bottleneck(psp_outs)
+
+ return output
+
+ def forward(self, inputs):
+ """Forward function."""
+
+ inputs = self._transform_inputs(inputs)
+
+ # build laterals
+ laterals = [
+ lateral_conv(inputs[i])
+ for i, lateral_conv in enumerate(self.lateral_convs)
+ ]
+
+ laterals.append(self.psp_forward(inputs))
+
+ # build top-down path
+ used_backbone_levels = len(laterals)
+ for i in range(used_backbone_levels - 1, 0, -1):
+ prev_shape = laterals[i - 1].shape[2:]
+ laterals[i - 1] += resize(
+ laterals[i],
+ size=prev_shape,
+ mode='bilinear',
+ align_corners=self.align_corners)
+
+ # build outputs
+ fpn_outs = [
+ self.fpn_convs[i](laterals[i])
+ for i in range(used_backbone_levels - 1)
+ ]
+ # append psp feature
+ fpn_outs.append(laterals[-1])
+
+ for i in range(used_backbone_levels - 1, 0, -1):
+ fpn_outs[i] = resize(
+ fpn_outs[i],
+ size=fpn_outs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ fpn_outs = torch.cat(fpn_outs, dim=1)
+ output = self.fpn_bottleneck(fpn_outs)
+ output = self.cls_seg(output)
+ return output
diff --git a/mmseg/models/losses/__init__.py b/mmseg/models/losses/__init__.py
new file mode 100644
index 0000000000..225bdde393
--- /dev/null
+++ b/mmseg/models/losses/__init__.py
@@ -0,0 +1,10 @@
+from .accuracy import Accuracy, accuracy
+from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
+ cross_entropy, mask_cross_entropy)
+from .utils import reduce_loss, weight_reduce_loss, weighted_loss
+
+__all__ = [
+ 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
+ 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
+ 'weight_reduce_loss', 'weighted_loss'
+]
diff --git a/mmseg/models/losses/accuracy.py b/mmseg/models/losses/accuracy.py
new file mode 100644
index 0000000000..e45f9ec485
--- /dev/null
+++ b/mmseg/models/losses/accuracy.py
@@ -0,0 +1,78 @@
+import torch.nn as nn
+
+
+def accuracy(pred, target, topk=1, thresh=None):
+ """Calculate accuracy according to the prediction and target.
+
+ Args:
+ pred (torch.Tensor): The model prediction, shape (N, num_class, ...)
+ target (torch.Tensor): The target of each prediction, shape (N, , ...)
+ topk (int | tuple[int], optional): If the predictions in ``topk``
+ matches the target, the predictions will be regarded as
+ correct ones. Defaults to 1.
+ thresh (float, optional): If not None, predictions with scores under
+ this threshold are considered incorrect. Default to None.
+
+ Returns:
+ float | tuple[float]: If the input ``topk`` is a single integer,
+ the function will return a single float as accuracy. If
+ ``topk`` is a tuple containing multiple integers, the
+ function will return a tuple containing accuracies of
+ each ``topk`` number.
+ """
+ assert isinstance(topk, (int, tuple))
+ if isinstance(topk, int):
+ topk = (topk, )
+ return_single = True
+ else:
+ return_single = False
+
+ maxk = max(topk)
+ if pred.size(0) == 0:
+ accu = [pred.new_tensor(0.) for i in range(len(topk))]
+ return accu[0] if return_single else accu
+ assert pred.ndim == target.ndim + 1
+ assert pred.size(0) == target.size(0)
+ assert maxk <= pred.size(1), \
+ f'maxk {maxk} exceeds pred dimension {pred.size(1)}'
+ pred_value, pred_label = pred.topk(maxk, dim=1)
+ # transpose to shape (maxk, N, ...)
+ pred_label = pred_label.transpose(0, 1)
+ correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label))
+ if thresh is not None:
+ # Only prediction values larger than thresh are counted as correct
+ correct = correct & (pred_value > thresh).t()
+ res = []
+ for k in topk:
+ correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / target.numel()))
+ return res[0] if return_single else res
+
+
+class Accuracy(nn.Module):
+ """Accuracy calculation module."""
+
+ def __init__(self, topk=(1, ), thresh=None):
+ """Module to calculate the accuracy.
+
+ Args:
+ topk (tuple, optional): The criterion used to calculate the
+ accuracy. Defaults to (1,).
+ thresh (float, optional): If not None, predictions with scores
+ under this threshold are considered incorrect. Default to None.
+ """
+ super().__init__()
+ self.topk = topk
+ self.thresh = thresh
+
+ def forward(self, pred, target):
+ """Forward function to calculate accuracy.
+
+ Args:
+ pred (torch.Tensor): Prediction of models.
+ target (torch.Tensor): Target for each prediction.
+
+ Returns:
+ tuple[float]: The accuracies under different topk criterions.
+ """
+ return accuracy(pred, target, self.topk, self.thresh)
diff --git a/mmseg/models/losses/cross_entropy_loss.py b/mmseg/models/losses/cross_entropy_loss.py
new file mode 100644
index 0000000000..dcd9f1c894
--- /dev/null
+++ b/mmseg/models/losses/cross_entropy_loss.py
@@ -0,0 +1,179 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def cross_entropy(pred,
+ label,
+ weight=None,
+ class_weight=None,
+ reduction='mean',
+ avg_factor=None,
+ ignore_index=-100):
+ """The wrapper function for :func:`F.cross_entropy`"""
+ # class_weight is a manual rescaling weight given to each class.
+ # If given, has to be a Tensor of size C element-wise losses
+ loss = F.cross_entropy(
+ pred,
+ label,
+ weight=class_weight,
+ reduction='none',
+ ignore_index=ignore_index)
+
+ # apply weights and do the reduction
+ if weight is not None:
+ weight = weight.float()
+ loss = weight_reduce_loss(
+ loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+ return loss
+
+
+def _expand_onehot_labels(labels, label_weights, label_channels):
+ """Expand onehot labels to match the size of prediction."""
+ bin_labels = labels.new_full((labels.size(0), label_channels), 0)
+ inds = torch.nonzero(labels >= 1, as_tuple=False).squeeze()
+ if inds.numel() > 0:
+ bin_labels[inds, labels[inds] - 1] = 1
+ if label_weights is None:
+ bin_label_weights = None
+ else:
+ bin_label_weights = label_weights.view(-1, 1).expand(
+ label_weights.size(0), label_channels)
+ return bin_labels, bin_label_weights
+
+
+def binary_cross_entropy(pred,
+ label,
+ weight=None,
+ reduction='mean',
+ avg_factor=None,
+ class_weight=None):
+ """Calculate the binary CrossEntropy loss.
+
+ Args:
+ pred (torch.Tensor): The prediction with shape (N, 1).
+ label (torch.Tensor): The learning label of the prediction.
+ weight (torch.Tensor, optional): Sample-wise loss weight.
+ reduction (str, optional): The method used to reduce the loss.
+ Options are "none", "mean" and "sum".
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ class_weight (list[float], optional): The weight for each class.
+
+ Returns:
+ torch.Tensor: The calculated loss
+ """
+ if pred.dim() != label.dim():
+ label, weight = _expand_onehot_labels(label, weight, pred.size(-1))
+
+ # weighted element-wise losses
+ if weight is not None:
+ weight = weight.float()
+ loss = F.binary_cross_entropy_with_logits(
+ pred, label.float(), weight=class_weight, reduction='none')
+ # do the reduction for the weighted loss
+ loss = weight_reduce_loss(
+ loss, weight, reduction=reduction, avg_factor=avg_factor)
+
+ return loss
+
+
+def mask_cross_entropy(pred,
+ target,
+ label,
+ reduction='mean',
+ avg_factor=None,
+ class_weight=None):
+ """Calculate the CrossEntropy loss for masks.
+
+ Args:
+ pred (torch.Tensor): The prediction with shape (N, C), C is the number
+ of classes.
+ target (torch.Tensor): The learning label of the prediction.
+ label (torch.Tensor): ``label`` indicates the class label of the mask'
+ corresponding object. This will be used to select the mask in the
+ of the class which the object belongs to when the mask prediction
+ if not class-agnostic.
+ reduction (str, optional): The method used to reduce the loss.
+ Options are "none", "mean" and "sum".
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ class_weight (list[float], optional): The weight for each class.
+
+ Returns:
+ torch.Tensor: The calculated loss
+ """
+ # TODO: handle these two reserved arguments
+ assert reduction == 'mean' and avg_factor is None
+ num_rois = pred.size()[0]
+ inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
+ pred_slice = pred[inds, label].squeeze(1)
+ return F.binary_cross_entropy_with_logits(
+ pred_slice, target, weight=class_weight, reduction='mean')[None]
+
+
+@LOSSES.register_module()
+class CrossEntropyLoss(nn.Module):
+ """CrossEntropyLoss.
+
+ Args:
+ use_sigmoid (bool, optional): Whether the prediction uses sigmoid
+ of softmax. Defaults to False.
+ use_mask (bool, optional): Whether to use mask cross entropy loss.
+ Defaults to False.
+ reduction (str, optional): . Defaults to 'mean'.
+ Options are "none", "mean" and "sum".
+ class_weight (list[float], optional): Weight of each class.
+ Defaults to None.
+ loss_weight (float, optional): Weight of the loss. Defaults to 1.0.
+ """
+
+ def __init__(self,
+ use_sigmoid=False,
+ use_mask=False,
+ reduction='mean',
+ class_weight=None,
+ loss_weight=1.0):
+ super(CrossEntropyLoss, self).__init__()
+ assert (use_sigmoid is False) or (use_mask is False)
+ self.use_sigmoid = use_sigmoid
+ self.use_mask = use_mask
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+ self.class_weight = class_weight
+
+ if self.use_sigmoid:
+ self.cls_criterion = binary_cross_entropy
+ elif self.use_mask:
+ self.cls_criterion = mask_cross_entropy
+ else:
+ self.cls_criterion = cross_entropy
+
+ def forward(self,
+ cls_score,
+ label,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ """Forward function."""
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ if self.class_weight is not None:
+ class_weight = cls_score.new_tensor(self.class_weight)
+ else:
+ class_weight = None
+ loss_cls = self.loss_weight * self.cls_criterion(
+ cls_score,
+ label,
+ weight,
+ class_weight=class_weight,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss_cls
diff --git a/mmseg/models/losses/utils.py b/mmseg/models/losses/utils.py
new file mode 100644
index 0000000000..a1153fa9f3
--- /dev/null
+++ b/mmseg/models/losses/utils.py
@@ -0,0 +1,101 @@
+import functools
+
+import torch.nn.functional as F
+
+
+def reduce_loss(loss, reduction):
+ """Reduce loss as specified.
+
+ Args:
+ loss (Tensor): Elementwise loss tensor.
+ reduction (str): Options are "none", "mean" and "sum".
+
+ Return:
+ Tensor: Reduced loss tensor.
+ """
+ reduction_enum = F._Reduction.get_enum(reduction)
+ # none: 0, elementwise_mean:1, sum: 2
+ if reduction_enum == 0:
+ return loss
+ elif reduction_enum == 1:
+ return loss.mean()
+ elif reduction_enum == 2:
+ return loss.sum()
+
+
+def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
+ """Apply element-wise weight and reduce loss.
+
+ Args:
+ loss (Tensor): Element-wise loss.
+ weight (Tensor): Element-wise weights.
+ reduction (str): Same as built-in losses of PyTorch.
+ avg_factor (float): Avarage factor when computing the mean of losses.
+
+ Returns:
+ Tensor: Processed loss values.
+ """
+ # if weight is specified, apply element-wise weight
+ if weight is not None:
+ assert weight.dim() == loss.dim()
+ if weight.dim() > 1:
+ assert weight.size(1) == 1 or weight.size(1) == loss.size(1)
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ loss = reduce_loss(loss, reduction)
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ loss = loss.sum() / avg_factor
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+ return loss
+
+
+def weighted_loss(loss_func):
+ """Create a weighted version of a given loss function.
+
+ To use this decorator, the loss function must have the signature like
+ `loss_func(pred, target, **kwargs)`. The function only needs to compute
+ element-wise loss without any reduction. This decorator will add weight
+ and reduction arguments to the function. The decorated function will have
+ the signature like `loss_func(pred, target, weight=None, reduction='mean',
+ avg_factor=None, **kwargs)`.
+
+ :Example:
+
+ >>> import torch
+ >>> @weighted_loss
+ >>> def l1_loss(pred, target):
+ >>> return (pred - target).abs()
+
+ >>> pred = torch.Tensor([0, 2, 3])
+ >>> target = torch.Tensor([1, 1, 1])
+ >>> weight = torch.Tensor([1, 0, 1])
+
+ >>> l1_loss(pred, target)
+ tensor(1.3333)
+ >>> l1_loss(pred, target, weight)
+ tensor(1.)
+ >>> l1_loss(pred, target, reduction='none')
+ tensor([1., 1., 2.])
+ >>> l1_loss(pred, target, weight, avg_factor=2)
+ tensor(1.5000)
+ """
+
+ @functools.wraps(loss_func)
+ def wrapper(pred,
+ target,
+ weight=None,
+ reduction='mean',
+ avg_factor=None,
+ **kwargs):
+ # get element-wise loss
+ loss = loss_func(pred, target, **kwargs)
+ loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+ return loss
+
+ return wrapper
diff --git a/mmseg/models/segmentors/__init__.py b/mmseg/models/segmentors/__init__.py
new file mode 100644
index 0000000000..3f600ecb9f
--- /dev/null
+++ b/mmseg/models/segmentors/__init__.py
@@ -0,0 +1,4 @@
+from .cascade_encoder_decoder import CascadeEncoderDecoder
+from .encoder_decoder import EncoderDecoder
+
+__all__ = ['EncoderDecoder', 'CascadeEncoderDecoder']
diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py
new file mode 100644
index 0000000000..4f31127210
--- /dev/null
+++ b/mmseg/models/segmentors/base.py
@@ -0,0 +1,267 @@
+import logging
+import warnings
+from abc import ABCMeta, abstractmethod
+from collections import OrderedDict
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
+
+class BaseSegmentor(nn.Module):
+ """Base class for segmentors."""
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self):
+ super(BaseSegmentor, self).__init__()
+
+ @property
+ def with_neck(self):
+ """bool: whether the segmentor has neck"""
+ return hasattr(self, 'neck') and self.neck is not None
+
+ @property
+ def with_auxiliary_head(self):
+ """bool: whether the segmentor has auxiliary head"""
+ return hasattr(self,
+ 'auxiliary_head') and self.auxiliary_head is not None
+
+ @property
+ def with_decode_head(self):
+ """bool: whether the segmentor has decode head"""
+ return hasattr(self, 'decode_head') and self.decode_head is not None
+
+ @abstractmethod
+ def extract_feat(self, imgs):
+ """Placeholder for extract features from images."""
+ pass
+
+ @abstractmethod
+ def encode_decode(self, img, img_metas):
+ """Placeholder for encode images with backbone and decode into a
+ semantic segmentation map of the same size as input."""
+ pass
+
+ @abstractmethod
+ def forward_train(self, imgs, img_metas, **kwargs):
+ """Placeholder for Forward function for training."""
+ pass
+
+ @abstractmethod
+ def simple_test(self, img, img_meta, **kwargs):
+ """Placeholder for single image test."""
+ pass
+
+ @abstractmethod
+ def aug_test(self, imgs, img_metas, **kwargs):
+ """Placeholder for augmentation test."""
+ pass
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in segmentor.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ if pretrained is not None:
+ logger = logging.getLogger()
+ logger.info(f'load model from: {pretrained}')
+
+ def forward_test(self, imgs, img_metas, **kwargs):
+ """
+ Args:
+ imgs (List[Tensor]): the outer list indicates test-time
+ augmentations and inner Tensor should have a shape NxCxHxW,
+ which contains all images in the batch.
+ img_metas (List[List[dict]]): the outer list indicates test-time
+ augs (multiscale, flip, etc.) and the inner list indicates
+ images in a batch.
+ """
+ for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
+ if not isinstance(var, list):
+ raise TypeError(f'{name} must be a list, but got '
+ f'{type(var)}')
+
+ num_augs = len(imgs)
+ if num_augs != len(img_metas):
+ raise ValueError(f'num of augmentations ({len(imgs)}) != '
+ f'num of image meta ({len(img_metas)})')
+ # all images in the same aug batch all of the same ori_shape and pad
+ # shape
+ for img_meta in img_metas:
+ ori_shapes = [_['ori_shape'] for _ in img_meta]
+ assert all(shape == ori_shapes[0] for shape in ori_shapes)
+ img_shapes = [_['img_shape'] for _ in img_meta]
+ assert all(shape == img_shapes[0] for shape in img_shapes)
+ pad_shapes = [_['pad_shape'] for _ in img_meta]
+ assert all(shape == pad_shapes[0] for shape in pad_shapes)
+
+ if num_augs == 1:
+ return self.simple_test(imgs[0], img_metas[0], **kwargs)
+ else:
+ return self.aug_test(imgs, img_metas, **kwargs)
+
+ def forward(self, img, img_metas, return_loss=True, **kwargs):
+ """Calls either :func:`forward_train` or :func:`forward_test` depending
+ on whether ``return_loss`` is ``True``.
+
+ Note this setting will change the expected inputs. When
+ ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor
+ and List[dict]), and when ``resturn_loss=False``, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+ """
+ if return_loss:
+ return self.forward_train(img, img_metas, **kwargs)
+ else:
+ return self.forward_test(img, img_metas, **kwargs)
+
+ def train_step(self, data_batch, optimizer, **kwargs):
+ """The iteration step during training.
+
+ This method defines an iteration step during training, except for the
+ back propagation and optimizer updating, which are done in an optimizer
+ hook. Note that in some complicated cases or models, the whole process
+ including back propagation and optimizer updating is also defined in
+ this method, such as GAN.
+
+ Args:
+ data (dict): The output of dataloader.
+ optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
+ runner is passed to ``train_step()``. This argument is unused
+ and reserved.
+
+ Returns:
+ dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
+ ``num_samples``.
+ ``loss`` is a tensor for back propagation, which can be a
+ weighted sum of multiple losses.
+ ``log_vars`` contains all the variables to be sent to the
+ logger.
+ ``num_samples`` indicates the batch size (when the model is
+ DDP, it means the batch size on each GPU), which is used for
+ averaging the logs.
+ """
+ losses = self.forward_train(**data_batch, **kwargs)
+ loss, log_vars = self._parse_losses(losses)
+
+ outputs = dict(
+ loss=loss,
+ log_vars=log_vars,
+ num_samples=len(data_batch['img'].data))
+
+ return outputs
+
+ def val_step(self, data_batch, **kwargs):
+ """The iteration step during validation.
+
+ This method shares the same signature as :func:`train_step`, but used
+ during val epochs. Note that the evaluation after training epochs is
+ not implemented with this method, but an evaluation hook.
+ """
+ output = self.forward_test(**data_batch, **kwargs)
+ return output
+
+ @staticmethod
+ def _parse_losses(losses):
+ """Parse the raw outputs (losses) of the network.
+
+ Args:
+ losses (dict): Raw output of the network, which usually contain
+ losses and other necessary information.
+
+ Returns:
+ tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor
+ which may be a weighted sum of all losses, log_vars contains
+ all the variables to be sent to the logger.
+ """
+ log_vars = OrderedDict()
+ for loss_name, loss_value in losses.items():
+ if isinstance(loss_value, torch.Tensor):
+ log_vars[loss_name] = loss_value.mean()
+ elif isinstance(loss_value, list):
+ log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+ else:
+ raise TypeError(
+ f'{loss_name} is not a tensor or list of tensors')
+
+ loss = sum(_value for _key, _value in log_vars.items()
+ if 'loss' in _key)
+
+ log_vars['loss'] = loss
+ for loss_name, loss_value in log_vars.items():
+ # reduce loss when distributed training
+ if dist.is_available() and dist.is_initialized():
+ loss_value = loss_value.data.clone()
+ dist.all_reduce(loss_value.div_(dist.get_world_size()))
+ log_vars[loss_name] = loss_value.item()
+
+ return loss, log_vars
+
+ def show_result(self,
+ img,
+ result,
+ palette=None,
+ win_name='',
+ show=False,
+ wait_time=0,
+ out_file=None):
+ """Draw `result` over `img`.
+
+ Args:
+ img (str or Tensor): The image to be displayed.
+ result (Tensor): The semantic segmentation results to draw over
+ `img`.
+ palette (list[list[int]]] | np.ndarray | None): The palette of
+ segmentation map. If None is given, random palette will be
+ generated. Default: None
+ win_name (str): The window name.
+ wait_time (int): Value of waitKey param.
+ Default: 0.
+ show (bool): Whether to show the image.
+ Default: False.
+ out_file (str or None): The filename to write the image.
+ Default: None.
+
+ Returns:
+ img (Tensor): Only if not `show` or `out_file`
+ """
+ img = mmcv.imread(img)
+ img = img.copy()
+ seg = result[0]
+ if palette is None:
+ if self.PALETTE is None:
+ palette = np.random.randint(
+ 0, 255, size=(len(self.CLASSES), 3))
+ else:
+ palette = self.PALETTE
+ else:
+ palette = np.array(palette)
+ assert palette.shape[0] == len(self.CLASSES)
+ assert palette.shape[1] == 3
+ assert len(palette.shape) == 2
+ color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
+ for label, color in enumerate(palette):
+ color_seg[seg == label, :] = color
+ # convert to BGR
+ color_seg = color_seg[..., ::-1]
+
+ img = img * 0.5 + color_seg * 0.5
+ img = img.astype(np.uint8)
+ # if out_file specified, do not show image in window
+ if out_file is not None:
+ show = False
+
+ if show:
+ mmcv.imshow(img, win_name, wait_time)
+ if out_file is not None:
+ mmcv.imwrite(img, out_file)
+
+ if not (show or out_file):
+ warnings.warn('show==False and out_file is not specified, only '
+ 'result image will be returned')
+ return img
diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py
new file mode 100644
index 0000000000..220ab2bb36
--- /dev/null
+++ b/mmseg/models/segmentors/cascade_encoder_decoder.py
@@ -0,0 +1,98 @@
+from torch import nn
+
+from mmseg.core import add_prefix
+from mmseg.ops import resize
+from .. import builder
+from ..builder import SEGMENTORS
+from .encoder_decoder import EncoderDecoder
+
+
+@SEGMENTORS.register_module()
+class CascadeEncoderDecoder(EncoderDecoder):
+ """Cascade Encoder Decoder segmentors.
+
+ CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of
+ CascadeEncoderDecoder are cascaded. The output of previous decoder_head
+ will be the input of next decoder_head.
+ """
+
+ def __init__(self,
+ num_stages,
+ backbone,
+ decode_head,
+ neck=None,
+ auxiliary_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ self.num_stages = num_stages
+ super(CascadeEncoderDecoder, self).__init__(
+ backbone=backbone,
+ decode_head=decode_head,
+ neck=neck,
+ auxiliary_head=auxiliary_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained)
+
+ def _init_decode_head(self, decode_head):
+ """Initialize ``decode_head``"""
+ assert isinstance(decode_head, list)
+ assert len(decode_head) == self.num_stages
+ self.decode_head = nn.ModuleList()
+ for i in range(self.num_stages):
+ self.decode_head.append(builder.build_head(decode_head[i]))
+ self.align_corners = self.decode_head[-1].align_corners
+ self.num_classes = self.decode_head[-1].num_classes
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone and heads.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ self.backbone.init_weights(pretrained=pretrained)
+ for i in range(self.num_stages):
+ self.decode_head[i].init_weights()
+ if self.with_auxiliary_head:
+ if isinstance(self.auxiliary_head, nn.ModuleList):
+ for aux_head in self.auxiliary_head:
+ aux_head.init_weights()
+ else:
+ self.auxiliary_head.init_weights()
+
+ def encode_decode(self, img, img_metas):
+ """Encode images with backbone and decode into a semantic segmentation
+ map of the same size as input."""
+ x = self.extract_feat(img)
+ out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg)
+ for i in range(1, self.num_stages):
+ out = self.decode_head[i].forward_test(x, out, img_metas,
+ self.test_cfg)
+ out = resize(
+ input=out,
+ size=img.shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ return out
+
+ def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
+ """Run forward function and calculate loss for decode head in
+ training."""
+ losses = dict()
+
+ loss_decode = self.decode_head[0].forward_train(
+ x, img_metas, gt_semantic_seg, self.train_cfg)
+
+ losses.update(add_prefix(loss_decode, 'decode_0'))
+
+ for i in range(1, self.num_stages):
+ # forward test again, maybe unnecessary for most methods.
+ prev_outputs = self.decode_head[i - 1].forward_test(
+ x, img_metas, self.test_cfg)
+ loss_decode = self.decode_head[i].forward_train(
+ x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg)
+ losses.update(add_prefix(loss_decode, f'decode_{i}'))
+
+ return losses
diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py
new file mode 100644
index 0000000000..d3ce17adbb
--- /dev/null
+++ b/mmseg/models/segmentors/encoder_decoder.py
@@ -0,0 +1,282 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmseg.core import add_prefix
+from mmseg.ops import resize
+from .. import builder
+from ..builder import SEGMENTORS
+from .base import BaseSegmentor
+
+
+@SEGMENTORS.register_module()
+class EncoderDecoder(BaseSegmentor):
+ """Encoder Decoder segmentors.
+
+ EncoderDecoder typically consists of backbone, decode_head, auxiliary_head.
+ Note that auxiliary_head is only used for deep supervision during training,
+ which could be dumped during inference.
+ """
+
+ def __init__(self,
+ backbone,
+ decode_head,
+ neck=None,
+ auxiliary_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(EncoderDecoder, self).__init__()
+ self.backbone = builder.build_backbone(backbone)
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+ self._init_decode_head(decode_head)
+ self._init_auxiliary_head(auxiliary_head)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ self.init_weights(pretrained=pretrained)
+
+ assert self.with_decode_head
+
+ def _init_decode_head(self, decode_head):
+ """Initialize ``decode_head``"""
+ self.decode_head = builder.build_head(decode_head)
+ self.align_corners = self.decode_head.align_corners
+ self.num_classes = self.decode_head.num_classes
+
+ def _init_auxiliary_head(self, auxiliary_head):
+ """Initialize ``auxiliary_head``"""
+ if auxiliary_head is not None:
+ if isinstance(auxiliary_head, list):
+ self.auxiliary_head = nn.ModuleList()
+ for head_cfg in auxiliary_head:
+ self.auxiliary_head.append(builder.build_head(head_cfg))
+ else:
+ self.auxiliary_head = builder.build_head(auxiliary_head)
+
+ def init_weights(self, pretrained=None):
+ """Initialize the weights in backbone and heads.
+
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+
+ super(EncoderDecoder, self).init_weights(pretrained)
+ self.backbone.init_weights(pretrained=pretrained)
+ self.decode_head.init_weights()
+ if self.with_auxiliary_head:
+ if isinstance(self.auxiliary_head, nn.ModuleList):
+ for aux_head in self.auxiliary_head:
+ aux_head.init_weights()
+ else:
+ self.auxiliary_head.init_weights()
+
+ def extract_feat(self, img):
+ """Extract features from images."""
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def encode_decode(self, img, img_metas):
+ """Encode images with backbone and decode into a semantic segmentation
+ map of the same size as input."""
+ x = self.extract_feat(img)
+ out = self._decode_head_forward_test(x, img_metas)
+ out = resize(
+ input=out,
+ size=img.shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ return out
+
+ def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
+ """Run forward function and calculate loss for decode head in
+ training."""
+ losses = dict()
+ loss_decode = self.decode_head.forward_train(x, img_metas,
+ gt_semantic_seg,
+ self.train_cfg)
+
+ losses.update(add_prefix(loss_decode, 'decode'))
+ return losses
+
+ def _decode_head_forward_test(self, x, img_metas):
+ """Run forward function and calculate loss for decode head in
+ inference."""
+ seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg)
+ return seg_logits
+
+ def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg):
+ """Run forward function and calculate loss for auxiliary head in
+ training."""
+ losses = dict()
+ if isinstance(self.auxiliary_head, nn.ModuleList):
+ for idx, aux_head in enumerate(self.auxiliary_head):
+ loss_aux = aux_head.forward_train(x, img_metas,
+ gt_semantic_seg,
+ self.train_cfg)
+ losses.update(add_prefix(loss_aux, f'aux_{idx}'))
+ else:
+ loss_aux = self.auxiliary_head.forward_train(
+ x, img_metas, gt_semantic_seg, self.train_cfg)
+ losses.update(add_prefix(loss_aux, 'aux'))
+
+ return losses
+
+ def forward_dummy(self, img):
+ """Dummy forward function."""
+ seg_logit = self.encode_decode(img, None)
+
+ return seg_logit
+
+ def forward_train(self, img, img_metas, gt_semantic_seg):
+ """Forward function for training.
+
+ Args:
+ img (Tensor): Input images.
+ img_metas (list[dict]): List of image info dict where each dict
+ has: 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmseg/datasets/pipelines/formatting.py:Collect`.
+ gt_semantic_seg (Tensor): Semantic segmentation masks
+ used if the architecture supports semantic segmentation task.
+
+ Returns:
+ dict[str, Tensor]: a dictionary of loss components
+ """
+
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ loss_decode = self._decode_head_forward_train(x, img_metas,
+ gt_semantic_seg)
+ losses.update(loss_decode)
+
+ if self.with_auxiliary_head:
+ loss_aux = self._auxiliary_head_forward_train(
+ x, img_metas, gt_semantic_seg)
+ losses.update(loss_aux)
+
+ return losses
+
+ # TODO refactor
+ def slide_inference(self, img, img_meta, rescale):
+ """Inference by sliding-window with overlap."""
+
+ h_stride, w_stride = self.test_cfg.stride
+ h_crop, w_crop = self.test_cfg.crop_size
+ batch_size, _, h_img, w_img = img.size()
+ num_classes = self.num_classes
+ h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
+ w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
+ preds = img.new_zeros((batch_size, num_classes, h_img, w_img))
+ count_mat = img.new_zeros((batch_size, 1, h_img, w_img))
+ for h_idx in range(h_grids):
+ for w_idx in range(w_grids):
+ y1 = h_idx * h_stride
+ x1 = w_idx * w_stride
+ y2 = min(y1 + h_crop, h_img)
+ x2 = min(x1 + w_crop, w_img)
+ y1 = max(y2 - h_crop, 0)
+ x1 = max(x2 - w_crop, 0)
+ crop_img = img[:, :, y1:y2, x1:x2]
+ pad_img = crop_img.new_zeros(
+ (crop_img.size(0), crop_img.size(1), h_crop, w_crop))
+ pad_img[:, :, :y2 - y1, :x2 - x1] = crop_img
+ pad_seg_logit = self.encode_decode(pad_img, img_meta)
+ preds[:, :, y1:y2,
+ x1:x2] += pad_seg_logit[:, :, :y2 - y1, :x2 - x1]
+ count_mat[:, :, y1:y2, x1:x2] += 1
+ assert (count_mat == 0).sum() == 0
+ preds = preds / count_mat
+ if rescale:
+ preds = resize(
+ preds,
+ size=img_meta[0]['ori_shape'][:2],
+ mode='bilinear',
+ align_corners=self.align_corners,
+ warning=False)
+
+ return preds
+
+ def whole_inference(self, img, img_meta, rescale):
+ """Inference with full image."""
+
+ seg_logit = self.encode_decode(img, img_meta)
+ if rescale:
+ seg_logit = resize(
+ seg_logit,
+ size=img_meta[0]['ori_shape'][:2],
+ mode='bilinear',
+ align_corners=self.align_corners,
+ warning=False)
+
+ return seg_logit
+
+ def inference(self, img, img_meta, rescale):
+ """Inference with slide/whole style.
+
+ Args:
+ img (Tensor): The input image of shape (N, 3, H, W).
+ img_meta (dict): Image info dict where each dict has: 'img_shape',
+ 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmseg/datasets/pipelines/formatting.py:Collect`.
+ rescale (bool): Whether rescale back to original shape.
+
+ Returns:
+ Tensor: The output segmentation map.
+ """
+
+ assert self.test_cfg.mode in ['slide', 'whole']
+ ori_shape = img_meta[0]['ori_shape']
+ assert all(_['ori_shape'] == ori_shape for _ in img_meta)
+ if self.test_cfg.mode == 'slide':
+ seg_logit = self.slide_inference(img, img_meta, rescale)
+ else:
+ seg_logit = self.whole_inference(img, img_meta, rescale)
+ output = F.softmax(seg_logit, dim=1)
+ flip = img_meta[0]['flip']
+ flip_direction = img_meta[0]['flip_direction']
+ if flip:
+ assert flip_direction in ['horizontal', 'vertical']
+ if flip_direction == 'horizontal':
+ output = output.flip(dims=(3, ))
+ elif flip_direction == 'vertical':
+ output = output.flip(dims=(2, ))
+
+ return output
+
+ def simple_test(self, img, img_meta, rescale=True):
+ """Simple test with single image."""
+ seg_logit = self.inference(img, img_meta, rescale)
+ seg_pred = seg_logit.argmax(dim=1)
+ seg_pred = seg_pred.cpu().numpy()
+ # unravel batch dim
+ seg_pred = list(seg_pred)
+ return seg_pred
+
+ def aug_test(self, imgs, img_metas, rescale=True):
+ """Test with augmentations.
+
+ Only rescale=True is supported.
+ """
+ # aug_test rescale all imgs back to ori_shape for now
+ assert rescale
+ # to save memory, we get augmented seg logit inplace
+ seg_logit = self.inference(imgs[0], img_metas[0], rescale)
+ for i in range(1, len(imgs)):
+ cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale)
+ seg_logit += cur_seg_logit
+ seg_logit /= len(imgs)
+ seg_pred = seg_logit.argmax(dim=1)
+ seg_pred = seg_pred.cpu().numpy()
+ # unravel batch dim
+ seg_pred = list(seg_pred)
+ return seg_pred
diff --git a/mmseg/models/utils/__init__.py b/mmseg/models/utils/__init__.py
new file mode 100644
index 0000000000..71d3f423ce
--- /dev/null
+++ b/mmseg/models/utils/__init__.py
@@ -0,0 +1,4 @@
+from .res_layer import ResLayer
+from .self_attention_block import SelfAttentionBlock
+
+__all__ = ['ResLayer', 'SelfAttentionBlock']
diff --git a/mmseg/models/utils/res_layer.py b/mmseg/models/utils/res_layer.py
new file mode 100644
index 0000000000..9ef51b95b0
--- /dev/null
+++ b/mmseg/models/utils/res_layer.py
@@ -0,0 +1,95 @@
+from mmcv.cnn import build_conv_layer, build_norm_layer
+from torch import nn as nn
+
+
+class ResLayer(nn.Sequential):
+ """ResLayer to build ResNet style backbone.
+
+ Args:
+ block (nn.Module): block used to build ResLayer.
+ inplanes (int): inplanes of block.
+ planes (int): planes of block.
+ num_blocks (int): number of blocks.
+ stride (int): stride of the first block. Default: 1
+ avg_down (bool): Use AvgPool instead of stride conv when
+ downsampling in the bottleneck. Default: False
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ Default: None
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ Default: dict(type='BN')
+ multi_grid (int | None): Multi grid dilation rates of last
+ stage. Default: None
+ contract_dilation (bool): Whether contract first dilation of each layer
+ Default: False
+ """
+
+ def __init__(self,
+ block,
+ inplanes,
+ planes,
+ num_blocks,
+ stride=1,
+ dilation=1,
+ avg_down=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ multi_grid=None,
+ contract_dilation=False,
+ **kwargs):
+ self.block = block
+
+ downsample = None
+ if stride != 1 or inplanes != planes * block.expansion:
+ downsample = []
+ conv_stride = stride
+ # check dilation for dilated ResNet
+ if avg_down and (stride != 1 or dilation != 1):
+ conv_stride = 1
+ downsample.append(
+ nn.AvgPool2d(
+ kernel_size=stride,
+ stride=stride,
+ ceil_mode=True,
+ count_include_pad=False))
+ downsample.extend([
+ build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=conv_stride,
+ bias=False),
+ build_norm_layer(norm_cfg, planes * block.expansion)[1]
+ ])
+ downsample = nn.Sequential(*downsample)
+
+ layers = []
+ if multi_grid is None:
+ if dilation > 1 and contract_dilation:
+ first_dilation = dilation // 2
+ else:
+ first_dilation = dilation
+ else:
+ first_dilation = multi_grid[0]
+ layers.append(
+ block(
+ inplanes=inplanes,
+ planes=planes,
+ stride=stride,
+ dilation=first_dilation,
+ downsample=downsample,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ inplanes = planes * block.expansion
+ for i in range(1, num_blocks):
+ layers.append(
+ block(
+ inplanes=inplanes,
+ planes=planes,
+ stride=1,
+ dilation=dilation if multi_grid is None else multi_grid[i],
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ **kwargs))
+ super(ResLayer, self).__init__(*layers)
diff --git a/mmseg/models/utils/self_attention_block.py b/mmseg/models/utils/self_attention_block.py
new file mode 100644
index 0000000000..372fad2e00
--- /dev/null
+++ b/mmseg/models/utils/self_attention_block.py
@@ -0,0 +1,159 @@
+import torch
+from mmcv.cnn import ConvModule, constant_init
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+class SelfAttentionBlock(nn.Module):
+ """General self-attention block/non-local block.
+
+ Please refer to https://arxiv.org/abs/1706.03762 for details about key,
+ query and value.
+
+ Args:
+ key_in_channels (int): Input channels of key feature.
+ query_in_channels (int): Input channels of query feature.
+ channels (int): Output channels of key/query transform.
+ out_channels (int): Output channels.
+ share_key_query (bool): Whether share projection weight between key
+ and query projection.
+ query_downsample (nn.Module): Query downsample module.
+ key_downsample (nn.Module): Key downsample module.
+ key_query_num_convs (int): Number of convs for key/query projection.
+ value_num_convs (int): Number of convs for value projection.
+ matmul_norm (bool): Whether normalize attention map with sqrt of
+ channels
+ with_out (bool): Whether use out projection.
+ conv_cfg (dict|None): Config of conv layers.
+ norm_cfg (dict|None): Config of norm layers.
+ act_cfg (dict|None): Config of activation layers.
+ """
+
+ def __init__(self, key_in_channels, query_in_channels, channels,
+ out_channels, share_key_query, query_downsample,
+ key_downsample, key_query_num_convs, value_out_num_convs,
+ key_query_norm, value_out_norm, matmul_norm, with_out,
+ conv_cfg, norm_cfg, act_cfg):
+ super(SelfAttentionBlock, self).__init__()
+ if share_key_query:
+ assert key_in_channels == query_in_channels
+ self.key_in_channels = key_in_channels
+ self.query_in_channels = query_in_channels
+ self.out_channels = out_channels
+ self.channels = channels
+ self.share_key_query = share_key_query
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.key_project = self.build_project(
+ key_in_channels,
+ channels,
+ num_convs=key_query_num_convs,
+ use_conv_module=key_query_norm,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ if share_key_query:
+ self.query_project = self.key_project
+ else:
+ self.query_project = self.build_project(
+ query_in_channels,
+ channels,
+ num_convs=key_query_num_convs,
+ use_conv_module=key_query_norm,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.value_project = self.build_project(
+ key_in_channels,
+ channels if with_out else out_channels,
+ num_convs=value_out_num_convs,
+ use_conv_module=value_out_norm,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ if with_out:
+ self.out_project = self.build_project(
+ channels,
+ out_channels,
+ num_convs=value_out_num_convs,
+ use_conv_module=value_out_norm,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ else:
+ self.out_project = None
+
+ self.query_downsample = query_downsample
+ self.key_downsample = key_downsample
+ self.matmul_norm = matmul_norm
+
+ self.init_weights()
+
+ def init_weights(self):
+ """Initialize weight of later layer."""
+ if self.out_project is not None:
+ if not isinstance(self.out_project, ConvModule):
+ constant_init(self.out_project, 0)
+
+ def build_project(self, in_channels, channels, num_convs, use_conv_module,
+ conv_cfg, norm_cfg, act_cfg):
+ """Build projection layer for key/query/value/out."""
+ if use_conv_module:
+ convs = [
+ ConvModule(
+ in_channels,
+ channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ ]
+ for _ in range(num_convs - 1):
+ convs.append(
+ ConvModule(
+ channels,
+ channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg))
+ else:
+ convs = [nn.Conv2d(in_channels, channels, 1)]
+ for _ in range(num_convs - 1):
+ convs.append(nn.Conv2d(channels, channels, 1))
+ if len(convs) > 1:
+ convs = nn.Sequential(*convs)
+ else:
+ convs = convs[0]
+ return convs
+
+ def forward(self, query_feats, key_feats):
+ """Forward function."""
+ batch_size = query_feats.size(0)
+ query = self.query_project(query_feats)
+ if self.query_downsample is not None:
+ query = self.query_downsample(query)
+ query = query.reshape(*query.shape[:2], -1)
+ query = query.permute(0, 2, 1).contiguous()
+
+ key = self.key_project(key_feats)
+ value = self.value_project(key_feats)
+ if self.key_downsample is not None:
+ key = self.key_downsample(key)
+ value = self.key_downsample(value)
+ key = key.reshape(*key.shape[:2], -1)
+ value = value.reshape(*value.shape[:2], -1)
+ value = value.permute(0, 2, 1).contiguous()
+
+ sim_map = torch.matmul(query, key)
+ if self.matmul_norm:
+ sim_map = (self.channels**-.5) * sim_map
+ sim_map = F.softmax(sim_map, dim=-1)
+
+ context = torch.matmul(sim_map, value)
+ context = context.permute(0, 2, 1).contiguous()
+ context = context.reshape(batch_size, -1, *query_feats.shape[2:])
+ if self.out_project is not None:
+ context = self.out_project(context)
+ return context
diff --git a/mmseg/ops/__init__.py b/mmseg/ops/__init__.py
new file mode 100644
index 0000000000..54b0d0b79c
--- /dev/null
+++ b/mmseg/ops/__init__.py
@@ -0,0 +1,5 @@
+from .encoding import Encoding
+from .separable_conv_module import DepthwiseSeparableConvModule
+from .wrappers import resize
+
+__all__ = ['resize', 'DepthwiseSeparableConvModule', 'Encoding']
diff --git a/mmseg/ops/encoding.py b/mmseg/ops/encoding.py
new file mode 100644
index 0000000000..d939189657
--- /dev/null
+++ b/mmseg/ops/encoding.py
@@ -0,0 +1,74 @@
+import torch
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+class Encoding(nn.Module):
+ """Encoding Layer: a learnable residual encoder.
+
+ Input is of shape (batch_size, channels, height, width).
+ Output is of shape (batch_size, num_codes, channels).
+
+ Args:
+ channels: dimension of the features or feature channels
+ num_codes: number of code words
+ """
+
+ def __init__(self, channels, num_codes):
+ super(Encoding, self).__init__()
+ # init codewords and smoothing factor
+ self.channels, self.num_codes = channels, num_codes
+ std = 1. / ((num_codes * channels)**0.5)
+ # [num_codes, channels]
+ self.codewords = nn.Parameter(
+ torch.empty(num_codes, channels,
+ dtype=torch.float).uniform_(-std, std),
+ requires_grad=True)
+ # [num_codes]
+ self.scale = nn.Parameter(
+ torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0),
+ requires_grad=True)
+
+ @staticmethod
+ def scaled_l2(x, codewords, scale):
+ num_codes, channels = codewords.size()
+ batch_size = x.size(0)
+ reshaped_scale = scale.view((1, 1, num_codes))
+ expanded_x = x.unsqueeze(2).expand(
+ (batch_size, x.size(1), num_codes, channels))
+ reshaped_codewords = codewords.view((1, 1, num_codes, channels))
+
+ scaled_l2_norm = reshaped_scale * (
+ expanded_x - reshaped_codewords).pow(2).sum(dim=3)
+ return scaled_l2_norm
+
+ @staticmethod
+ def aggregate(assigment_weights, x, codewords):
+ num_codes, channels = codewords.size()
+ reshaped_codewords = codewords.view((1, 1, num_codes, channels))
+ batch_size = x.size(0)
+
+ expanded_x = x.unsqueeze(2).expand(
+ (batch_size, x.size(1), num_codes, channels))
+ encoded_feat = (assigment_weights.unsqueeze(3) *
+ (expanded_x - reshaped_codewords)).sum(dim=1)
+ return encoded_feat
+
+ def forward(self, x):
+ assert x.dim() == 4 and x.size(1) == self.channels
+ # [batch_size, channels, height, width]
+ batch_size = x.size(0)
+ # [batch_size, height x width, channels]
+ x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
+ # assignment_weights: [batch_size, channels, num_codes]
+ assigment_weights = F.softmax(
+ self.scaled_l2(x, self.codewords, self.scale), dim=2)
+ # aggregate
+ encoded_feat = self.aggregate(assigment_weights, x, self.codewords)
+ return encoded_feat
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \
+ f'x{self.channels})'
+ return repr_str
diff --git a/mmseg/ops/separable_conv_module.py b/mmseg/ops/separable_conv_module.py
new file mode 100644
index 0000000000..4e5922cc4d
--- /dev/null
+++ b/mmseg/ops/separable_conv_module.py
@@ -0,0 +1,88 @@
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+
+class DepthwiseSeparableConvModule(nn.Module):
+ """Depthwise separable convolution module.
+
+ See https://arxiv.org/pdf/1704.04861.pdf for details.
+
+ This module can replace a ConvModule with the conv block replaced by two
+ conv block: depthwise conv block and pointwise conv block. The depthwise
+ conv block contains depthwise-conv/norm/activation layers. The pointwise
+ conv block contains pointwise-conv/norm/activation layers. It should be
+ noted that there will be norm/activation layer in the depthwise conv block
+ if `norm_cfg` and `act_cfg` are specified.
+
+ Args:
+ in_channels (int): Same as nn.Conv2d.
+ out_channels (int): Same as nn.Conv2d.
+ kernel_size (int or tuple[int]): Same as nn.Conv2d.
+ stride (int or tuple[int]): Same as nn.Conv2d. Default: 1.
+ padding (int or tuple[int]): Same as nn.Conv2d. Default: 0.
+ dilation (int or tuple[int]): Same as nn.Conv2d. Default: 1.
+ norm_cfg (dict): Default norm config for both depthwise ConvModule and
+ pointwise ConvModule. Default: None.
+ act_cfg (dict): Default activation config for both depthwise ConvModule
+ and pointwise ConvModule. Default: dict(type='ReLU').
+ dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
+ 'default', it will be the same as `norm_cfg`. Default: 'default'.
+ dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
+ 'default', it will be the same as `act_cfg`. Default: 'default'.
+ pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
+ 'default', it will be the same as `norm_cfg`. Default: 'default'.
+ pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
+ 'default', it will be the same as `act_cfg`. Default: 'default'.
+ kwargs (optional): Other shared arguments for depthwise and pointwise
+ ConvModule. See ConvModule for ref.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ dw_norm_cfg='default',
+ dw_act_cfg='default',
+ pw_norm_cfg='default',
+ pw_act_cfg='default',
+ **kwargs):
+ super(DepthwiseSeparableConvModule, self).__init__()
+ assert 'groups' not in kwargs, 'groups should not be specified'
+
+ # if norm/activation config of depthwise/pointwise ConvModule is not
+ # specified, use default config.
+ dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
+ dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
+ pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
+ pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
+
+ # depthwise convolution
+ self.depthwise_conv = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=in_channels,
+ norm_cfg=dw_norm_cfg,
+ act_cfg=dw_act_cfg,
+ **kwargs)
+
+ self.pointwise_conv = ConvModule(
+ in_channels,
+ out_channels,
+ 1,
+ norm_cfg=pw_norm_cfg,
+ act_cfg=pw_act_cfg,
+ **kwargs)
+
+ def forward(self, x):
+ x = self.depthwise_conv(x)
+ x = self.pointwise_conv(x)
+ return x
diff --git a/mmseg/ops/wrappers.py b/mmseg/ops/wrappers.py
new file mode 100644
index 0000000000..0b319767f5
--- /dev/null
+++ b/mmseg/ops/wrappers.py
@@ -0,0 +1,25 @@
+import warnings
+
+import torch.nn.functional as F
+
+
+def resize(input,
+ size=None,
+ scale_factor=None,
+ mode='nearest',
+ align_corners=None,
+ warning=True):
+ if warning:
+ if size is not None and align_corners:
+ input_h, input_w = input.shape[2:]
+ output_h, output_w = size
+ if output_h > input_h or output_w > output_h:
+ if ((output_h > 1 and output_w > 1 and input_h > 1
+ and input_w > 1) and (output_h - 1) % (input_h - 1)
+ and (output_w - 1) % (input_w - 1)):
+ warnings.warn(
+ f'When align_corners={align_corners}, '
+ 'the output would more aligned if '
+ f'input size {(input_h, input_w)} is `x+1` and '
+ f'out size {(output_h, output_w)} is `nx+1`')
+ return F.interpolate(input, size, scale_factor, mode, align_corners)
diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py
new file mode 100644
index 0000000000..e7d28670e0
--- /dev/null
+++ b/mmseg/utils/__init__.py
@@ -0,0 +1,7 @@
+from .collect_env import collect_env
+from .logger import get_root_logger
+
+__all__ = [
+ 'get_root_logger',
+ 'collect_env',
+]
diff --git a/mmseg/utils/collect_env.py b/mmseg/utils/collect_env.py
new file mode 100644
index 0000000000..8b82019668
--- /dev/null
+++ b/mmseg/utils/collect_env.py
@@ -0,0 +1,70 @@
+import os.path as osp
+import subprocess
+import sys
+from collections import defaultdict
+
+import cv2
+import mmcv
+import torch
+import torchvision
+from mmcv.utils.parrots_wrapper import get_build_config
+
+import mmseg
+
+
+def collect_env():
+ """Collect the information of the running environments."""
+ env_info = {}
+ env_info['sys.platform'] = sys.platform
+ env_info['Python'] = sys.version.replace('\n', '')
+
+ cuda_available = torch.cuda.is_available()
+ env_info['CUDA available'] = cuda_available
+
+ if cuda_available:
+ from mmcv.utils.parrots_wrapper import CUDA_HOME
+ env_info['CUDA_HOME'] = CUDA_HOME
+
+ if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
+ try:
+ nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
+ nvcc = subprocess.check_output(
+ '"{}" -V | tail -n1'.format(nvcc), shell=True)
+ nvcc = nvcc.decode('utf-8').strip()
+ except subprocess.SubprocessError:
+ nvcc = 'Not Available'
+ env_info['NVCC'] = nvcc
+
+ devices = defaultdict(list)
+ for k in range(torch.cuda.device_count()):
+ devices[torch.cuda.get_device_name(k)].append(str(k))
+ for name, devids in devices.items():
+ env_info['GPU ' + ','.join(devids)] = name
+
+ gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
+ gcc = gcc.decode('utf-8').strip()
+ env_info['GCC'] = gcc
+
+ env_info['PyTorch'] = torch.__version__
+ env_info['PyTorch compiling details'] = get_build_config()
+
+ env_info['TorchVision'] = torchvision.__version__
+
+ env_info['OpenCV'] = cv2.__version__
+
+ env_info['MMCV'] = mmcv.__version__
+ env_info['MMSegmentation'] = mmseg.__version__
+ try:
+ from mmcv.ops import get_compiler_version, get_compiling_cuda_version
+ env_info['MMCV Compiler'] = get_compiler_version()
+ env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version()
+ except ImportError:
+ env_info['MMCV Compiler'] = 'n/a'
+ env_info['MMCV CUDA Compiler'] = 'n/a'
+
+ return env_info
+
+
+if __name__ == '__main__':
+ for name, val in collect_env().items():
+ print('{}: {}'.format(name, val))
diff --git a/mmseg/utils/logger.py b/mmseg/utils/logger.py
new file mode 100644
index 0000000000..05d2f13439
--- /dev/null
+++ b/mmseg/utils/logger.py
@@ -0,0 +1,27 @@
+import logging
+
+from mmcv.utils import get_logger
+
+
+def get_root_logger(log_file=None, log_level=logging.INFO):
+ """Get the root logger.
+
+ The logger will be initialized if it has not been initialized. By default a
+ StreamHandler will be added. If `log_file` is specified, a FileHandler will
+ also be added. The name of the root logger is the top-level package name,
+ e.g., "mmseg".
+
+ Args:
+ log_file (str | None): The log filename. If specified, a FileHandler
+ will be added to the root logger.
+ log_level (int): The root logger level. Note that only the process of
+ rank 0 is affected, while other processes will set the level to
+ "Error" and be silent most of the time.
+
+ Returns:
+ logging.Logger: The root logger.
+ """
+
+ logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level)
+
+ return logger
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000000..9796e871e7
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+addopts = --xdoctest --xdoctest-style=auto
+norecursedirs = .git ignore build __pycache__ data docker docs .eggs
+
+filterwarnings= default
+ ignore:.*No cfgstr given in Cacher constructor or call.*:Warning
+ ignore:.*Define the __nice__ method for.*:Warning
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000..6981bd7233
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+-r requirements/build.txt
+-r requirements/optional.txt
+-r requirements/runtime.txt
+-r requirements/tests.txt
diff --git a/requirements/build.txt b/requirements/build.txt
new file mode 100644
index 0000000000..2f74f3d17f
--- /dev/null
+++ b/requirements/build.txt
@@ -0,0 +1,3 @@
+# These must be installed before building mmsegmentation
+numpy
+# torch
diff --git a/requirements/optional.txt b/requirements/optional.txt
new file mode 100644
index 0000000000..47fa593315
--- /dev/null
+++ b/requirements/optional.txt
@@ -0,0 +1 @@
+cityscapesscripts
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
new file mode 100644
index 0000000000..a03605f66a
--- /dev/null
+++ b/requirements/runtime.txt
@@ -0,0 +1,4 @@
+matplotlib
+numpy
+# torch
+# torchvision
diff --git a/requirements/tests.txt b/requirements/tests.txt
new file mode 100644
index 0000000000..400f79cd26
--- /dev/null
+++ b/requirements/tests.txt
@@ -0,0 +1,8 @@
+asynctest
+codecov
+flake8
+interrogate
+isort==4.3.21
+pytest
+xdoctest>=0.10.0
+yapf
diff --git a/resources/mmseg-logo.png b/resources/mmseg-logo.png
new file mode 100644
index 0000000000..009083a9e8
Binary files /dev/null and b/resources/mmseg-logo.png differ
diff --git a/resources/seg_demo.gif b/resources/seg_demo.gif
new file mode 100644
index 0000000000..2f0760fe7a
Binary files /dev/null and b/resources/seg_demo.gif differ
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000000..2102a8ca60
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,13 @@
+[yapf]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = true
+split_before_expression_after_opening_paren = true
+
+[isort]
+line_length = 79
+multi_line_output = 0
+known_standard_library = setuptools
+known_first_party = mmseg
+known_third_party = PIL,cityscapesscripts,cv2,matplotlib,mmcv,numpy,pytablewriter,pytest,scipy,torch,torchvision
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000000..af05f95548
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+import os
+import subprocess
+import time
+from setuptools import find_packages, setup
+
+import torch
+from mmcv.utils.parrots_wrapper import (BuildExtension, CppExtension,
+ CUDAExtension)
+
+
+def readme():
+ with open('README.md', encoding='utf-8') as f:
+ content = f.read()
+ return content
+
+
+version_file = 'mmseg/version.py'
+
+
+def get_git_hash():
+
+ def _minimal_ext_cmd(cmd):
+ # construct minimal environment
+ env = {}
+ for k in ['SYSTEMROOT', 'PATH', 'HOME']:
+ v = os.environ.get(k)
+ if v is not None:
+ env[k] = v
+ # LANGUAGE is used on win32
+ env['LANGUAGE'] = 'C'
+ env['LANG'] = 'C'
+ env['LC_ALL'] = 'C'
+ out = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
+ return out
+
+ try:
+ out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
+ sha = out.strip().decode('ascii')
+ except OSError:
+ sha = 'unknown'
+
+ return sha
+
+
+def get_hash():
+ if os.path.exists('.git'):
+ sha = get_git_hash()[:7]
+ elif os.path.exists(version_file):
+ try:
+ from mmseg.version import __version__
+ sha = __version__.split('+')[-1]
+ except ImportError:
+ raise ImportError('Unable to get git version')
+ else:
+ sha = 'unknown'
+
+ return sha
+
+
+def write_version_py():
+ content = """# GENERATED VERSION FILE
+# TIME: {}
+
+__version__ = '{}'
+short_version = '{}'
+version_info = ({})
+"""
+ sha = get_hash()
+ with open('mmseg/VERSION', 'r') as f:
+ SHORT_VERSION = f.read().strip()
+ VERSION_INFO = ', '.join(SHORT_VERSION.split('.'))
+ VERSION = SHORT_VERSION + '+' + sha
+
+ version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION,
+ VERSION_INFO)
+ with open(version_file, 'w') as f:
+ f.write(version_file_str)
+
+
+def get_version():
+ with open(version_file, 'r') as f:
+ exec(compile(f.read(), version_file, 'exec'))
+ return locals()['__version__']
+
+
+def make_cuda_ext(name, module, sources, sources_cuda=[]):
+
+ define_macros = []
+ extra_compile_args = {'cxx': []}
+
+ if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
+ define_macros += [('WITH_CUDA', None)]
+ extension = CUDAExtension
+ extra_compile_args['nvcc'] = [
+ '-D__CUDA_NO_HALF_OPERATORS__',
+ '-D__CUDA_NO_HALF_CONVERSIONS__',
+ '-D__CUDA_NO_HALF2_OPERATORS__',
+ ]
+ sources += sources_cuda
+ else:
+ print('Compiling {} without CUDA'.format(name))
+ extension = CppExtension
+ # raise EnvironmentError('CUDA is required to compile MMSegmentation!')
+
+ return extension(
+ name='{}.{}'.format(module, name),
+ sources=[os.path.join(*module.split('.'), p) for p in sources],
+ define_macros=define_macros,
+ extra_compile_args=extra_compile_args)
+
+
+def parse_requirements(fname='requirements.txt', with_version=True):
+ """Parse the package dependencies listed in a requirements file but strips
+ specific versioning information.
+
+ Args:
+ fname (str): path to requirements file
+ with_version (bool, default=False): if True include version specs
+
+ Returns:
+ List[str]: list of requirements items
+
+ CommandLine:
+ python -c "import setup; print(setup.parse_requirements())"
+ """
+ import sys
+ from os.path import exists
+ import re
+ require_fpath = fname
+
+ def parse_line(line):
+ """Parse information from a line in a requirements text file."""
+ if line.startswith('-r '):
+ # Allow specifying requirements in other files
+ target = line.split(' ')[1]
+ for info in parse_require_file(target):
+ yield info
+ else:
+ info = {'line': line}
+ if line.startswith('-e '):
+ info['package'] = line.split('#egg=')[1]
+ else:
+ # Remove versioning from the package
+ pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+ parts = re.split(pat, line, maxsplit=1)
+ parts = [p.strip() for p in parts]
+
+ info['package'] = parts[0]
+ if len(parts) > 1:
+ op, rest = parts[1:]
+ if ';' in rest:
+ # Handle platform specific dependencies
+ # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+ version, platform_deps = map(str.strip,
+ rest.split(';'))
+ info['platform_deps'] = platform_deps
+ else:
+ version = rest # NOQA
+ info['version'] = (op, version)
+ yield info
+
+ def parse_require_file(fpath):
+ with open(fpath, 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+ if line and not line.startswith('#'):
+ for info in parse_line(line):
+ yield info
+
+ def gen_packages_items():
+ if exists(require_fpath):
+ for info in parse_require_file(require_fpath):
+ parts = [info['package']]
+ if with_version and 'version' in info:
+ parts.extend(info['version'])
+ if not sys.version.startswith('3.4'):
+ # apparently package_deps are broken in 3.4
+ platform_deps = info.get('platform_deps')
+ if platform_deps is not None:
+ parts.append(';' + platform_deps)
+ item = ''.join(parts)
+ yield item
+
+ packages = list(gen_packages_items())
+ return packages
+
+
+if __name__ == '__main__':
+ write_version_py()
+ setup(
+ name='mmseg',
+ version=get_version(),
+ description='Open MMLab Semantic Segmentation Toolbox and Benchmark',
+ long_description=readme(),
+ author='MMSegmentation Authors',
+ author_email='openmmlab@gmail.com',
+ keywords='computer vision, semantic segmentation',
+ url='http://github.com/open-mmlab/mmsegmentation',
+ packages=find_packages(exclude=('configs', 'tools', 'demo')),
+ package_data={'mmseg.ops': ['*/*.so']},
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ ],
+ license='Apache License 2.0',
+ setup_requires=parse_requirements('requirements/build.txt'),
+ tests_require=parse_requirements('requirements/tests.txt'),
+ install_requires=parse_requirements('requirements/runtime.txt'),
+ extras_require={
+ 'all': parse_requirements('requirements.txt'),
+ 'tests': parse_requirements('requirements/tests.txt'),
+ 'build': parse_requirements('requirements/build.txt'),
+ 'optional': parse_requirements('requirements/optional.txt'),
+ },
+ ext_modules=[],
+ cmdclass={'build_ext': BuildExtension},
+ zip_safe=False)
diff --git a/tests/data/color.jpg b/tests/data/color.jpg
new file mode 100644
index 0000000000..05d62b850a
Binary files /dev/null and b/tests/data/color.jpg differ
diff --git a/tests/data/gray.jpg b/tests/data/gray.jpg
new file mode 100644
index 0000000000..94edd7326f
Binary files /dev/null and b/tests/data/gray.jpg differ
diff --git a/tests/data/pseudo_dataset/gts/00000_gt.png b/tests/data/pseudo_dataset/gts/00000_gt.png
new file mode 100644
index 0000000000..48fc125a05
Binary files /dev/null and b/tests/data/pseudo_dataset/gts/00000_gt.png differ
diff --git a/tests/data/pseudo_dataset/gts/00001_gt.png b/tests/data/pseudo_dataset/gts/00001_gt.png
new file mode 100644
index 0000000000..ccb49b05f2
Binary files /dev/null and b/tests/data/pseudo_dataset/gts/00001_gt.png differ
diff --git a/tests/data/pseudo_dataset/gts/00002_gt.png b/tests/data/pseudo_dataset/gts/00002_gt.png
new file mode 100644
index 0000000000..db7250cf0d
Binary files /dev/null and b/tests/data/pseudo_dataset/gts/00002_gt.png differ
diff --git a/tests/data/pseudo_dataset/gts/00003_gt.png b/tests/data/pseudo_dataset/gts/00003_gt.png
new file mode 100644
index 0000000000..f96a1be1b2
Binary files /dev/null and b/tests/data/pseudo_dataset/gts/00003_gt.png differ
diff --git a/tests/data/pseudo_dataset/gts/00004_gt.png b/tests/data/pseudo_dataset/gts/00004_gt.png
new file mode 100644
index 0000000000..35b1cadfcd
Binary files /dev/null and b/tests/data/pseudo_dataset/gts/00004_gt.png differ
diff --git a/tests/data/pseudo_dataset/imgs/00000_img.jpg b/tests/data/pseudo_dataset/imgs/00000_img.jpg
new file mode 100644
index 0000000000..33ab8e29f1
Binary files /dev/null and b/tests/data/pseudo_dataset/imgs/00000_img.jpg differ
diff --git a/tests/data/pseudo_dataset/imgs/00001_img.jpg b/tests/data/pseudo_dataset/imgs/00001_img.jpg
new file mode 100644
index 0000000000..49c222901c
Binary files /dev/null and b/tests/data/pseudo_dataset/imgs/00001_img.jpg differ
diff --git a/tests/data/pseudo_dataset/imgs/00002_img.jpg b/tests/data/pseudo_dataset/imgs/00002_img.jpg
new file mode 100644
index 0000000000..6baeb5fded
Binary files /dev/null and b/tests/data/pseudo_dataset/imgs/00002_img.jpg differ
diff --git a/tests/data/pseudo_dataset/imgs/00003_img.jpg b/tests/data/pseudo_dataset/imgs/00003_img.jpg
new file mode 100644
index 0000000000..6e889d759d
Binary files /dev/null and b/tests/data/pseudo_dataset/imgs/00003_img.jpg differ
diff --git a/tests/data/pseudo_dataset/imgs/00004_img.jpg b/tests/data/pseudo_dataset/imgs/00004_img.jpg
new file mode 100644
index 0000000000..474c915d8c
Binary files /dev/null and b/tests/data/pseudo_dataset/imgs/00004_img.jpg differ
diff --git a/tests/data/pseudo_dataset/splits/train.txt b/tests/data/pseudo_dataset/splits/train.txt
new file mode 100644
index 0000000000..9e25ab0266
--- /dev/null
+++ b/tests/data/pseudo_dataset/splits/train.txt
@@ -0,0 +1,4 @@
+00000
+00001
+00002
+00003
diff --git a/tests/data/pseudo_dataset/splits/val.txt b/tests/data/pseudo_dataset/splits/val.txt
new file mode 100644
index 0000000000..59dd536625
--- /dev/null
+++ b/tests/data/pseudo_dataset/splits/val.txt
@@ -0,0 +1 @@
+00004
diff --git a/tests/data/seg.png b/tests/data/seg.png
new file mode 100644
index 0000000000..f23a499cef
Binary files /dev/null and b/tests/data/seg.png differ
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000000..77a0035e55
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,165 @@
+import glob
+import os
+from os.path import dirname, exists, isdir, join, relpath
+
+from mmcv import Config
+from torch import nn
+
+from mmseg.models import build_segmentor
+
+
+def _get_config_directory():
+ """Find the predefined segmentor config directory."""
+ try:
+ # Assume we are running in the source mmsegmentation repo
+ repo_dpath = dirname(dirname(__file__))
+ except NameError:
+ # For IPython development when this __file__ is not defined
+ import mmseg
+ repo_dpath = dirname(dirname(mmseg.__file__))
+ config_dpath = join(repo_dpath, 'configs')
+ if not exists(config_dpath):
+ raise Exception('Cannot find config path')
+ return config_dpath
+
+
+def test_config_build_segmentor():
+ """Test that all segmentation models defined in the configs can be
+ initialized."""
+ config_dpath = _get_config_directory()
+ print('Found config_dpath = {!r}'.format(config_dpath))
+
+ config_fpaths = []
+ # one config each sub folder
+ for sub_folder in os.listdir(config_dpath):
+ if isdir(sub_folder):
+ config_fpaths.append(
+ list(glob.glob(join(config_dpath, sub_folder, '*.py')))[0])
+ config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
+ config_names = [relpath(p, config_dpath) for p in config_fpaths]
+
+ print('Using {} config files'.format(len(config_names)))
+
+ for config_fname in config_names:
+ config_fpath = join(config_dpath, config_fname)
+ config_mod = Config.fromfile(config_fpath)
+
+ config_mod.model
+ config_mod.train_cfg
+ config_mod.test_cfg
+ print('Building segmentor, config_fpath = {!r}'.format(config_fpath))
+
+ # Remove pretrained keys to allow for testing in an offline environment
+ if 'pretrained' in config_mod.model:
+ config_mod.model['pretrained'] = None
+
+ print('building {}'.format(config_fname))
+ segmentor = build_segmentor(
+ config_mod.model,
+ train_cfg=config_mod.train_cfg,
+ test_cfg=config_mod.test_cfg)
+ assert segmentor is not None
+
+ head_config = config_mod.model['decode_head']
+ _check_decode_head(head_config, segmentor.decode_head)
+
+
+def test_config_data_pipeline():
+ """Test whether the data pipeline is valid and can process corner cases.
+
+ CommandLine:
+ xdoctest -m tests/test_config.py test_config_build_data_pipeline
+ """
+ from mmcv import Config
+ from mmseg.datasets.pipelines import Compose
+ import numpy as np
+
+ config_dpath = _get_config_directory()
+ print('Found config_dpath = {!r}'.format(config_dpath))
+
+ import glob
+ config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))
+ config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
+ config_names = [relpath(p, config_dpath) for p in config_fpaths]
+
+ print('Using {} config files'.format(len(config_names)))
+
+ for config_fname in config_names:
+ config_fpath = join(config_dpath, config_fname)
+ print(
+ 'Building data pipeline, config_fpath = {!r}'.format(config_fpath))
+ config_mod = Config.fromfile(config_fpath)
+
+ # remove loading pipeline
+ load_img_pipeline = config_mod.train_pipeline.pop(0)
+ to_float32 = load_img_pipeline.get('to_float32', False)
+ config_mod.train_pipeline.pop(0)
+ config_mod.test_pipeline.pop(0)
+
+ train_pipeline = Compose(config_mod.train_pipeline)
+ test_pipeline = Compose(config_mod.test_pipeline)
+
+ img = np.random.randint(0, 255, size=(1024, 2048, 3), dtype=np.uint8)
+ if to_float32:
+ img = img.astype(np.float32)
+ seg = np.random.randint(0, 255, size=(1024, 2048, 1), dtype=np.uint8)
+
+ results = dict(
+ filename='test_img.png',
+ ori_filename='test_img.png',
+ img=img,
+ img_shape=img.shape,
+ ori_shape=img.shape,
+ gt_semantic_seg=seg)
+ results['seg_fields'] = ['gt_semantic_seg']
+
+ print('Test training data pipeline: \n{!r}'.format(train_pipeline))
+ output_results = train_pipeline(results)
+ assert output_results is not None
+
+ results = dict(
+ filename='test_img.png',
+ ori_filename='test_img.png',
+ img=img,
+ img_shape=img.shape,
+ ori_shape=img.shape,
+ )
+ print('Test testing data pipeline: \n{!r}'.format(test_pipeline))
+ output_results = test_pipeline(results)
+ assert output_results is not None
+
+
+def _check_decode_head(decode_head_cfg, decode_head):
+ if isinstance(decode_head_cfg, list):
+ assert isinstance(decode_head, nn.ModuleList)
+ assert len(decode_head_cfg) == len(decode_head)
+ num_heads = len(decode_head)
+ for i in range(num_heads):
+ _check_decode_head(decode_head_cfg[i], decode_head[i])
+ return
+ # check consistency between head_config and roi_head
+ assert decode_head_cfg['type'] == decode_head.__class__.__name__
+
+ assert decode_head_cfg['type'] == decode_head.__class__.__name__
+
+ in_channels = decode_head_cfg.in_channels
+ input_transform = decode_head.input_transform
+ assert input_transform in ['resize_concat', 'multiple_select', None]
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(decode_head.in_index, (list, tuple))
+ assert len(in_channels) == len(decode_head.in_index)
+ elif input_transform == 'resize_concat':
+ assert sum(in_channels) == decode_head.in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert in_channels == decode_head.in_channels
+ assert isinstance(decode_head.in_index, int)
+
+ if decode_head_cfg['type'] == 'PointHead':
+ assert decode_head_cfg.channels+decode_head_cfg.num_classes == \
+ decode_head.fc_seg.in_channels
+ assert decode_head.fc_seg.out_channels == decode_head_cfg.num_classes
+ else:
+ assert decode_head_cfg.channels == decode_head.conv_seg.in_channels
+ assert decode_head.conv_seg.out_channels == decode_head_cfg.num_classes
diff --git a/tests/test_data/test_dataset.py b/tests/test_data/test_dataset.py
new file mode 100644
index 0000000000..ee6d2c47a8
--- /dev/null
+++ b/tests/test_data/test_dataset.py
@@ -0,0 +1,173 @@
+import os.path as osp
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+from mmseg.core.evaluation import get_classes, get_palette
+from mmseg.datasets import (ADE20KDataset, CityscapesDataset, ConcatDataset,
+ CustomDataset, PascalVOCDataset, RepeatDataset)
+
+
+def test_classes():
+ assert list(CityscapesDataset.CLASSES) == get_classes('cityscapes')
+ assert list(PascalVOCDataset.CLASSES) == get_classes('voc') == get_classes(
+ 'pascal_voc')
+ assert list(
+ ADE20KDataset.CLASSES) == get_classes('ade') == get_classes('ade20k')
+
+ with pytest.raises(ValueError):
+ get_classes('unsupported')
+
+
+def test_palette():
+ assert CityscapesDataset.PALETTE == get_palette('cityscapes')
+ assert PascalVOCDataset.PALETTE == get_palette('voc') == get_palette(
+ 'pascal_voc')
+ assert ADE20KDataset.PALETTE == get_palette('ade') == get_palette('ade20k')
+
+ with pytest.raises(ValueError):
+ get_palette('unsupported')
+
+
+@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
+@patch('mmseg.datasets.CustomDataset.__getitem__',
+ MagicMock(side_effect=lambda idx: idx))
+def test_dataset_wrapper():
+ # CustomDataset.load_annotations = MagicMock()
+ # CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx)
+ dataset_a = CustomDataset(img_dir=MagicMock(), pipeline=[])
+ len_a = 10
+ dataset_a.img_infos = MagicMock()
+ dataset_a.img_infos.__len__.return_value = len_a
+ dataset_b = CustomDataset(img_dir=MagicMock(), pipeline=[])
+ len_b = 20
+ dataset_b.img_infos = MagicMock()
+ dataset_b.img_infos.__len__.return_value = len_b
+
+ concat_dataset = ConcatDataset([dataset_a, dataset_b])
+ assert concat_dataset[5] == 5
+ assert concat_dataset[25] == 15
+ assert len(concat_dataset) == len(dataset_a) + len(dataset_b)
+
+ repeat_dataset = RepeatDataset(dataset_a, 10)
+ assert repeat_dataset[5] == 5
+ assert repeat_dataset[15] == 5
+ assert repeat_dataset[27] == 7
+ assert len(repeat_dataset) == 10 * len(dataset_a)
+
+
+def test_custom_dataset():
+ img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True)
+ crop_size = (512, 1024)
+ train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(128, 256), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+ ]
+ test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(128, 256),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+ ]
+
+ # with img_dir and ann_dir
+ train_dataset = CustomDataset(
+ train_pipeline,
+ data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'),
+ img_dir='imgs/',
+ ann_dir='gts/',
+ img_suffix='img.jpg',
+ seg_map_suffix='gt.png')
+ assert len(train_dataset) == 5
+
+ # with img_dir, ann_dir, split
+ train_dataset = CustomDataset(
+ train_pipeline,
+ data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'),
+ img_dir='imgs/',
+ ann_dir='gts/',
+ img_suffix='img.jpg',
+ seg_map_suffix='gt.png',
+ split='splits/train.txt')
+ assert len(train_dataset) == 4
+
+ # no data_root
+ train_dataset = CustomDataset(
+ train_pipeline,
+ img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs'),
+ ann_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/gts'),
+ img_suffix='img.jpg',
+ seg_map_suffix='gt.png')
+ assert len(train_dataset) == 5
+
+ # with data_root but img_dir/ann_dir are abs path
+ train_dataset = CustomDataset(
+ train_pipeline,
+ data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'),
+ img_dir=osp.abspath(
+ osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs')),
+ ann_dir=osp.abspath(
+ osp.join(osp.dirname(__file__), '../data/pseudo_dataset/gts')),
+ img_suffix='img.jpg',
+ seg_map_suffix='gt.png')
+ assert len(train_dataset) == 5
+
+ # test_mode=True
+ test_dataset = CustomDataset(
+ test_pipeline,
+ img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs'),
+ img_suffix='img.jpg',
+ test_mode=True)
+ assert len(test_dataset) == 5
+
+ # training data get
+ train_data = train_dataset[0]
+ assert isinstance(train_data, dict)
+
+ # test data get
+ test_data = test_dataset[0]
+ assert isinstance(test_data, dict)
+
+ # get gt seg map
+ gt_seg_maps = train_dataset.get_gt_seg_maps()
+ assert len(gt_seg_maps) == 5
+
+ # evaluation
+ pseudo_results = []
+ for gt_seg_map in gt_seg_maps:
+ h, w = gt_seg_map.shape
+ pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w)))
+ eval_results = train_dataset.evaluate(pseudo_results)
+ assert isinstance(eval_results, dict)
+ assert 'mIoU' in eval_results
+ assert 'mAcc' in eval_results
+ assert 'aAcc' in eval_results
+
+ # evaluation with CLASSES
+ train_dataset.CLASSES = tuple(['a'] * 7)
+ eval_results = train_dataset.evaluate(pseudo_results)
+ assert isinstance(eval_results, dict)
+ assert 'mIoU' in eval_results
+ assert 'mAcc' in eval_results
+ assert 'aAcc' in eval_results
diff --git a/tests/test_data/test_dataset_builder.py b/tests/test_data/test_dataset_builder.py
new file mode 100644
index 0000000000..c6827e4d17
--- /dev/null
+++ b/tests/test_data/test_dataset_builder.py
@@ -0,0 +1,192 @@
+import math
+import os.path as osp
+
+import pytest
+from torch.utils.data import (DistributedSampler, RandomSampler,
+ SequentialSampler)
+
+from mmseg.datasets import (DATASETS, ConcatDataset, build_dataloader,
+ build_dataset)
+
+
+@DATASETS.register_module()
+class ToyDataset(object):
+
+ def __init__(self, cnt=0):
+ self.cnt = cnt
+
+ def __item__(self, idx):
+ return idx
+
+ def __len__(self):
+ return 100
+
+
+def test_build_dataset():
+ cfg = dict(type='ToyDataset')
+ dataset = build_dataset(cfg)
+ assert isinstance(dataset, ToyDataset)
+ assert dataset.cnt == 0
+ dataset = build_dataset(cfg, default_args=dict(cnt=1))
+ assert isinstance(dataset, ToyDataset)
+ assert dataset.cnt == 1
+
+ data_root = osp.join(osp.dirname(__file__), '../data/pseudo_dataset')
+ img_dir = 'imgs/'
+ ann_dir = 'gts/'
+
+ # We use same dir twice for simplicity
+ # with ann_dir
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=[img_dir, img_dir],
+ ann_dir=[ann_dir, ann_dir])
+ dataset = build_dataset(cfg)
+ assert isinstance(dataset, ConcatDataset)
+ assert len(dataset) == 10
+
+ # with ann_dir, split
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=img_dir,
+ ann_dir=ann_dir,
+ split=['splits/train.txt', 'splits/val.txt'])
+ dataset = build_dataset(cfg)
+ assert isinstance(dataset, ConcatDataset)
+ assert len(dataset) == 5
+
+ # with ann_dir, split
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=img_dir,
+ ann_dir=[ann_dir, ann_dir],
+ split=['splits/train.txt', 'splits/val.txt'])
+ dataset = build_dataset(cfg)
+ assert isinstance(dataset, ConcatDataset)
+ assert len(dataset) == 5
+
+ # test mode
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=[img_dir, img_dir],
+ test_mode=True)
+ dataset = build_dataset(cfg)
+ assert isinstance(dataset, ConcatDataset)
+ assert len(dataset) == 10
+
+ # test mode with splits
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=[img_dir, img_dir],
+ split=['splits/val.txt', 'splits/val.txt'],
+ test_mode=True)
+ dataset = build_dataset(cfg)
+ assert isinstance(dataset, ConcatDataset)
+ assert len(dataset) == 2
+
+ # len(ann_dir) should be zero or len(img_dir) when len(img_dir) > 1
+ with pytest.raises(AssertionError):
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=[img_dir, img_dir],
+ ann_dir=[ann_dir, ann_dir, ann_dir])
+ build_dataset(cfg)
+
+ # len(splits) should be zero or len(img_dir) when len(img_dir) > 1
+ with pytest.raises(AssertionError):
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=[img_dir, img_dir],
+ split=['splits/val.txt', 'splits/val.txt', 'splits/val.txt'])
+ build_dataset(cfg)
+
+ # len(splits) == len(ann_dir) when only len(img_dir) == 1 and len(
+ # ann_dir) > 1
+ with pytest.raises(AssertionError):
+ cfg = dict(
+ type='CustomDataset',
+ pipeline=[],
+ data_root=data_root,
+ img_dir=img_dir,
+ ann_dir=[ann_dir, ann_dir],
+ split=['splits/val.txt', 'splits/val.txt', 'splits/val.txt'])
+ build_dataset(cfg)
+
+
+def test_build_dataloader():
+ dataset = ToyDataset()
+ samples_per_gpu = 3
+ # dist=True, shuffle=True, 1GPU
+ dataloader = build_dataloader(
+ dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2)
+ assert dataloader.batch_size == samples_per_gpu
+ assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+ assert isinstance(dataloader.sampler, DistributedSampler)
+ assert dataloader.sampler.shuffle
+
+ # dist=True, shuffle=False, 1GPU
+ dataloader = build_dataloader(
+ dataset,
+ samples_per_gpu=samples_per_gpu,
+ workers_per_gpu=2,
+ shuffle=False)
+ assert dataloader.batch_size == samples_per_gpu
+ assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+ assert isinstance(dataloader.sampler, DistributedSampler)
+ assert not dataloader.sampler.shuffle
+
+ # dist=True, shuffle=True, 8GPU
+ dataloader = build_dataloader(
+ dataset,
+ samples_per_gpu=samples_per_gpu,
+ workers_per_gpu=2,
+ num_gpus=8)
+ assert dataloader.batch_size == samples_per_gpu
+ assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+ assert dataloader.num_workers == 2
+
+ # dist=False, shuffle=True, 1GPU
+ dataloader = build_dataloader(
+ dataset,
+ samples_per_gpu=samples_per_gpu,
+ workers_per_gpu=2,
+ dist=False)
+ assert dataloader.batch_size == samples_per_gpu
+ assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+ assert isinstance(dataloader.sampler, RandomSampler)
+ assert dataloader.num_workers == 2
+
+ # dist=False, shuffle=False, 1GPU
+ dataloader = build_dataloader(
+ dataset,
+ samples_per_gpu=3,
+ workers_per_gpu=2,
+ shuffle=False,
+ dist=False)
+ assert dataloader.batch_size == samples_per_gpu
+ assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+ assert isinstance(dataloader.sampler, SequentialSampler)
+ assert dataloader.num_workers == 2
+
+ # dist=False, shuffle=True, 8GPU
+ dataloader = build_dataloader(
+ dataset, samples_per_gpu=3, workers_per_gpu=2, num_gpus=8, dist=False)
+ assert dataloader.batch_size == samples_per_gpu * 8
+ assert len(dataloader) == int(
+ math.ceil(len(dataset) / samples_per_gpu / 8))
+ assert isinstance(dataloader.sampler, RandomSampler)
+ assert dataloader.num_workers == 16
diff --git a/tests/test_data/test_loading.py b/tests/test_data/test_loading.py
new file mode 100644
index 0000000000..653b3daf4e
--- /dev/null
+++ b/tests/test_data/test_loading.py
@@ -0,0 +1,100 @@
+import copy
+import os.path as osp
+
+import numpy as np
+
+from mmseg.datasets.pipelines import LoadAnnotations, LoadImageFromFile
+
+
+class TestLoading(object):
+
+ @classmethod
+ def setup_class(cls):
+ cls.data_prefix = osp.join(osp.dirname(__file__), '../data')
+
+ def test_load_img(self):
+ results = dict(
+ img_prefix=self.data_prefix, img_info=dict(filename='color.jpg'))
+ transform = LoadImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['filename'] == osp.join(self.data_prefix, 'color.jpg')
+ assert results['ori_filename'] == 'color.jpg'
+ assert results['img'].shape == (288, 512, 3)
+ assert results['img'].dtype == np.uint8
+ assert results['img_shape'] == (288, 512, 3)
+ assert results['ori_shape'] == (288, 512, 3)
+ assert results['pad_shape'] == (288, 512, 3)
+ assert results['scale_factor'] == 1.0
+ np.testing.assert_equal(results['img_norm_cfg']['mean'],
+ np.zeros(3, dtype=np.float32))
+ assert repr(transform) == transform.__class__.__name__ + \
+ "(to_float32=False,color_type='color',imdecode_backend='cv2')"
+
+ # no img_prefix
+ results = dict(
+ img_prefix=None, img_info=dict(filename='tests/data/color.jpg'))
+ transform = LoadImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['filename'] == 'tests/data/color.jpg'
+ assert results['ori_filename'] == 'tests/data/color.jpg'
+ assert results['img'].shape == (288, 512, 3)
+
+ # to_float32
+ transform = LoadImageFromFile(to_float32=True)
+ results = transform(copy.deepcopy(results))
+ assert results['img'].dtype == np.float32
+
+ # gray image
+ results = dict(
+ img_prefix=self.data_prefix, img_info=dict(filename='gray.jpg'))
+ transform = LoadImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['img'].shape == (288, 512, 3)
+ assert results['img'].dtype == np.uint8
+
+ transform = LoadImageFromFile(color_type='unchanged')
+ results = transform(copy.deepcopy(results))
+ assert results['img'].shape == (288, 512)
+ assert results['img'].dtype == np.uint8
+ np.testing.assert_equal(results['img_norm_cfg']['mean'],
+ np.zeros(1, dtype=np.float32))
+
+ def test_load_seg(self):
+ results = dict(
+ seg_prefix=self.data_prefix,
+ ann_info=dict(seg_map='seg.png'),
+ seg_fields=[])
+ transform = LoadAnnotations()
+ results = transform(copy.deepcopy(results))
+ assert results['seg_fields'] == ['gt_semantic_seg']
+ assert results['gt_semantic_seg'].shape == (288, 512)
+ assert results['gt_semantic_seg'].dtype == np.uint8
+ assert repr(transform) == transform.__class__.__name__ + \
+ "(reduce_zero_label=False,imdecode_backend='pillow')"
+
+ # no img_prefix
+ results = dict(
+ seg_prefix=None,
+ ann_info=dict(seg_map='tests/data/seg.png'),
+ seg_fields=[])
+ transform = LoadAnnotations()
+ results = transform(copy.deepcopy(results))
+ assert results['gt_semantic_seg'].shape == (288, 512)
+ assert results['gt_semantic_seg'].dtype == np.uint8
+
+ # reduce_zero_label
+ transform = LoadAnnotations(reduce_zero_label=True)
+ results = transform(copy.deepcopy(results))
+ assert results['gt_semantic_seg'].shape == (288, 512)
+ assert results['gt_semantic_seg'].dtype == np.uint8
+
+ # mmcv backend
+ results = dict(
+ seg_prefix=self.data_prefix,
+ ann_info=dict(seg_map='seg.png'),
+ seg_fields=[])
+ transform = LoadAnnotations(imdecode_backend='pillow')
+ results = transform(copy.deepcopy(results))
+ # this image is saved by PIL
+ assert results['gt_semantic_seg'].shape == (288, 512)
+ assert results['gt_semantic_seg'].dtype == np.uint8
diff --git a/tests/test_data/test_transform.py b/tests/test_data/test_transform.py
new file mode 100644
index 0000000000..7a1ca0dde3
--- /dev/null
+++ b/tests/test_data/test_transform.py
@@ -0,0 +1,242 @@
+import copy
+import os.path as osp
+
+import mmcv
+import numpy as np
+import pytest
+from mmcv.utils import build_from_cfg
+from PIL import Image
+
+from mmseg.datasets.builder import PIPELINES
+
+
+def test_resize():
+ # test assertion if img_scale is a list
+ with pytest.raises(AssertionError):
+ transform = dict(type='Resize', img_scale=[1333, 800], keep_ratio=True)
+ build_from_cfg(transform, PIPELINES)
+
+ # test assertion if len(img_scale) while ratio_range is not None
+ with pytest.raises(AssertionError):
+ transform = dict(
+ type='Resize',
+ img_scale=[(1333, 800), (1333, 600)],
+ ratio_range=(0.9, 1.1),
+ keep_ratio=True)
+ build_from_cfg(transform, PIPELINES)
+
+ # test assertion for invalid multiscale_mode
+ with pytest.raises(AssertionError):
+ transform = dict(
+ type='Resize',
+ img_scale=[(1333, 800), (1333, 600)],
+ keep_ratio=True,
+ multiscale_mode='2333')
+ build_from_cfg(transform, PIPELINES)
+
+ transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True)
+ resize_module = build_from_cfg(transform, PIPELINES)
+
+ results = dict()
+ img = mmcv.imread(
+ osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ # Set initial values for default meta_keys
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = 1.0
+
+ resized_results = resize_module(results.copy())
+ assert resized_results['img_shape'] == (750, 1333, 3)
+
+ # test keep_ratio=False
+ transform = dict(
+ type='Resize',
+ img_scale=(1280, 800),
+ multiscale_mode='value',
+ keep_ratio=False)
+ resize_module = build_from_cfg(transform, PIPELINES)
+ resized_results = resize_module(results.copy())
+ assert resized_results['img_shape'] == (800, 1280, 3)
+
+ # test multiscale_mode='range'
+ transform = dict(
+ type='Resize',
+ img_scale=[(1333, 400), (1333, 1200)],
+ multiscale_mode='range',
+ keep_ratio=True)
+ resize_module = build_from_cfg(transform, PIPELINES)
+ resized_results = resize_module(results.copy())
+ assert max(resized_results['img_shape'][:2]) <= 1333
+ assert min(resized_results['img_shape'][:2]) >= 400
+ assert min(resized_results['img_shape'][:2]) <= 1200
+
+ # test multiscale_mode='value'
+ transform = dict(
+ type='Resize',
+ img_scale=[(1333, 800), (1333, 400)],
+ multiscale_mode='value',
+ keep_ratio=True)
+ resize_module = build_from_cfg(transform, PIPELINES)
+ resized_results = resize_module(results.copy())
+ assert resized_results['img_shape'] in [(750, 1333, 3), (400, 711, 3)]
+
+ # test multiscale_mode='range'
+ transform = dict(
+ type='Resize',
+ img_scale=(1333, 800),
+ ratio_range=(0.9, 1.1),
+ keep_ratio=True)
+ resize_module = build_from_cfg(transform, PIPELINES)
+ resized_results = resize_module(results.copy())
+ assert max(resized_results['img_shape'][:2]) <= 1333 * 1.1
+
+
+def test_flip():
+ # test assertion for invalid flip_ratio
+ with pytest.raises(AssertionError):
+ transform = dict(type='RandomFlip', flip_ratio=1.5)
+ build_from_cfg(transform, PIPELINES)
+
+ # test assertion for invalid direction
+ with pytest.raises(AssertionError):
+ transform = dict(
+ type='RandomFlip', flip_ratio=1, direction='horizonta')
+ build_from_cfg(transform, PIPELINES)
+
+ transform = dict(type='RandomFlip', flip_ratio=1)
+ flip_module = build_from_cfg(transform, PIPELINES)
+
+ results = dict()
+ img = mmcv.imread(
+ osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+ original_img = copy.deepcopy(img)
+ seg = np.array(
+ Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+ original_seg = copy.deepcopy(seg)
+ results['img'] = img
+ results['gt_semantic_seg'] = seg
+ results['seg_fields'] = ['gt_semantic_seg']
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ # Set initial values for default meta_keys
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = 1.0
+
+ results = flip_module(results)
+
+ flip_module = build_from_cfg(transform, PIPELINES)
+ results = flip_module(results)
+ assert np.equal(original_img, results['img']).all()
+ assert np.equal(original_seg, results['gt_semantic_seg']).all()
+
+
+def test_random_crop():
+ # test assertion for invalid random crop
+ with pytest.raises(AssertionError):
+ transform = dict(type='RandomCrop', crop_size=(-1, 0))
+ build_from_cfg(transform, PIPELINES)
+
+ results = dict()
+ img = mmcv.imread(
+ osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+ seg = np.array(
+ Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+ results['img'] = img
+ results['gt_semantic_seg'] = seg
+ results['seg_fields'] = ['gt_semantic_seg']
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ # Set initial values for default meta_keys
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = 1.0
+
+ h, w, _ = img.shape
+ transform = dict(type='RandomCrop', crop_size=(h - 20, w - 20))
+ crop_module = build_from_cfg(transform, PIPELINES)
+ results = crop_module(results)
+ assert results['img'].shape[:2] == (h - 20, w - 20)
+ assert results['img_shape'][:2] == (h - 20, w - 20)
+ assert results['gt_semantic_seg'].shape[:2] == (h - 20, w - 20)
+
+
+def test_pad():
+ # test assertion if both size_divisor and size is None
+ with pytest.raises(AssertionError):
+ transform = dict(type='Pad')
+ build_from_cfg(transform, PIPELINES)
+
+ transform = dict(type='Pad', size_divisor=32)
+ transform = build_from_cfg(transform, PIPELINES)
+ results = dict()
+ img = mmcv.imread(
+ osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+ original_img = copy.deepcopy(img)
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ # Set initial values for default meta_keys
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = 1.0
+
+ results = transform(results)
+ # original img already divisible by 32
+ assert np.equal(results['img'], original_img).all()
+ img_shape = results['img'].shape
+ assert img_shape[0] % 32 == 0
+ assert img_shape[1] % 32 == 0
+
+ resize_transform = dict(
+ type='Resize', img_scale=(1333, 800), keep_ratio=True)
+ resize_module = build_from_cfg(resize_transform, PIPELINES)
+ results = resize_module(results)
+ results = transform(results)
+ img_shape = results['img'].shape
+ assert img_shape[0] % 32 == 0
+ assert img_shape[1] % 32 == 0
+
+
+def test_normalize():
+ img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ to_rgb=True)
+ transform = dict(type='Normalize', **img_norm_cfg)
+ transform = build_from_cfg(transform, PIPELINES)
+ results = dict()
+ img = mmcv.imread(
+ osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+ original_img = copy.deepcopy(img)
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ # Set initial values for default meta_keys
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = 1.0
+
+ results = transform(results)
+
+ mean = np.array(img_norm_cfg['mean'])
+ std = np.array(img_norm_cfg['std'])
+ converted_img = (original_img[..., ::-1] - mean) / std
+ assert np.allclose(results['img'], converted_img)
+
+
+def test_seg_rescale():
+ results = dict()
+ seg = np.array(
+ Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+ results['gt_semantic_seg'] = seg
+ results['seg_fields'] = ['gt_semantic_seg']
+ h, w = seg.shape
+
+ transform = dict(type='SegRescale', scale_factor=1. / 2)
+ rescale_module = build_from_cfg(transform, PIPELINES)
+ rescale_results = rescale_module(results.copy())
+ assert rescale_results['gt_semantic_seg'].shape == (h // 2, w // 2)
+
+ transform = dict(type='SegRescale', scale_factor=1)
+ rescale_module = build_from_cfg(transform, PIPELINES)
+ rescale_results = rescale_module(results.copy())
+ assert rescale_results['gt_semantic_seg'].shape == (h, w)
diff --git a/tests/test_eval_hook.py b/tests/test_eval_hook.py
new file mode 100644
index 0000000000..84542ecfe3
--- /dev/null
+++ b/tests/test_eval_hook.py
@@ -0,0 +1,118 @@
+import logging
+import tempfile
+from unittest.mock import MagicMock, patch
+
+import mmcv.runner
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.runner import obj_from_dict
+from torch.utils.data import DataLoader, Dataset
+
+from mmseg.apis import single_gpu_test
+from mmseg.core import DistEvalHook, EvalHook
+
+
+class ExampleDataset(Dataset):
+
+ def __getitem__(self, idx):
+ results = dict(img=torch.tensor([1]), img_metas=dict())
+ return results
+
+ def __len__(self):
+ return 1
+
+
+class ExampleModel(nn.Module):
+
+ def __init__(self):
+ super(ExampleModel, self).__init__()
+ self.test_cfg = None
+ self.conv = nn.Conv2d(3, 3, 3)
+
+ def forward(self, img, img_metas, test_mode=False, **kwargs):
+ return img
+
+ def train_step(self, data_batch, optimizer):
+ loss = self.forward(**data_batch)
+ return dict(loss=loss)
+
+
+def test_eval_hook():
+ with pytest.raises(TypeError):
+ test_dataset = ExampleModel()
+ data_loader = [
+ DataLoader(
+ test_dataset,
+ batch_size=1,
+ sampler=None,
+ num_worker=0,
+ shuffle=False)
+ ]
+ EvalHook(data_loader)
+
+ test_dataset = ExampleDataset()
+ test_dataset.evaluate = MagicMock(return_value=dict(test='success'))
+ loader = DataLoader(test_dataset, batch_size=1)
+ model = ExampleModel()
+ data_loader = DataLoader(
+ test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False)
+ optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+ optimizer = obj_from_dict(optim_cfg, torch.optim,
+ dict(params=model.parameters()))
+
+ # test EvalHook
+ with tempfile.TemporaryDirectory() as tmpdir:
+ eval_hook = EvalHook(data_loader)
+ runner = mmcv.runner.IterBasedRunner(
+ model=model,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logging.getLogger())
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)], 1)
+ test_dataset.evaluate.assert_called_with([torch.tensor([1])],
+ logger=runner.logger)
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ results = single_gpu_test(model, data_loader)
+ return results
+
+
+@patch('mmseg.apis.multi_gpu_test', multi_gpu_test)
+def test_dist_eval_hook():
+ with pytest.raises(TypeError):
+ test_dataset = ExampleModel()
+ data_loader = [
+ DataLoader(
+ test_dataset,
+ batch_size=1,
+ sampler=None,
+ num_worker=0,
+ shuffle=False)
+ ]
+ DistEvalHook(data_loader)
+
+ test_dataset = ExampleDataset()
+ test_dataset.evaluate = MagicMock(return_value=dict(test='success'))
+ loader = DataLoader(test_dataset, batch_size=1)
+ model = ExampleModel()
+ data_loader = DataLoader(
+ test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False)
+ optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+ optimizer = obj_from_dict(optim_cfg, torch.optim,
+ dict(params=model.parameters()))
+
+ # test DistEvalHook
+ with tempfile.TemporaryDirectory() as tmpdir:
+ eval_hook = DistEvalHook(data_loader)
+ runner = mmcv.runner.IterBasedRunner(
+ model=model,
+ optimizer=optimizer,
+ work_dir=tmpdir,
+ logger=logging.getLogger())
+ runner.register_hook(eval_hook)
+ runner.run([loader], [('train', 1)], 1)
+ test_dataset.evaluate.assert_called_with([torch.tensor([1])],
+ logger=runner.logger)
diff --git a/tests/test_mean_iou.py b/tests/test_mean_iou.py
new file mode 100644
index 0000000000..48a3df8e4c
--- /dev/null
+++ b/tests/test_mean_iou.py
@@ -0,0 +1,56 @@
+import numpy as np
+
+from mmseg.core.evaluation import mean_iou
+
+
+def get_confusion_matrix(pred_label, label, num_classes, ignore_index):
+ """Intersection over Union
+ Args:
+ pred_label (np.ndarray): 2D predict map
+ label (np.ndarray): label 2D label map
+ num_classes (int): number of categories
+ ignore_index (int): index ignore in evaluation
+ """
+
+ mask = (label != ignore_index)
+ pred_label = pred_label[mask]
+ label = label[mask]
+
+ n = num_classes
+ inds = n * label + pred_label
+
+ mat = np.bincount(inds, minlength=n**2).reshape(n, n)
+
+ return mat
+
+
+# This func is deprecated since it's not memory efficient
+def legacy_mean_iou(results, gt_seg_maps, num_classes, ignore_index):
+ num_imgs = len(results)
+ assert len(gt_seg_maps) == num_imgs
+ total_mat = np.zeros((num_classes, num_classes), dtype=np.float)
+ for i in range(num_imgs):
+ mat = get_confusion_matrix(
+ results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
+ total_mat += mat
+ all_acc = np.diag(total_mat).sum() / total_mat.sum()
+ acc = np.diag(total_mat) / total_mat.sum(axis=1)
+ iou = np.diag(total_mat) / (
+ total_mat.sum(axis=1) + total_mat.sum(axis=0) - np.diag(total_mat))
+
+ return all_acc, acc, iou
+
+
+def test_mean_iou():
+ pred_size = (10, 30, 30)
+ num_classes = 19
+ ignore_index = 255
+ results = np.random.randint(0, num_classes, size=pred_size)
+ label = np.random.randint(0, num_classes, size=pred_size)
+ label[:, 2, 5:10] = ignore_index
+ all_acc, acc, iou = mean_iou(results, label, num_classes, ignore_index)
+ all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
+ ignore_index)
+ assert all_acc == all_acc_l
+ assert np.allclose(acc, acc_l)
+ assert np.allclose(iou, iou_l)
diff --git a/tests/test_models/test_backbone.py b/tests/test_models/test_backbone.py
new file mode 100644
index 0000000000..00ae43d009
--- /dev/null
+++ b/tests/test_models/test_backbone.py
@@ -0,0 +1,666 @@
+import pytest
+import torch
+from mmcv.ops import DeformConv2dPack
+from mmcv.utils.parrots_wrapper import _BatchNorm
+from torch.nn.modules import AvgPool2d, GroupNorm
+
+from mmseg.models.backbones import ResNet, ResNetV1d, ResNeXt
+from mmseg.models.backbones.resnet import BasicBlock, Bottleneck
+from mmseg.models.backbones.resnext import Bottleneck as BottleneckX
+from mmseg.models.utils import ResLayer
+
+
+def is_block(modules):
+ """Check if is ResNet building block."""
+ if isinstance(modules, (BasicBlock, Bottleneck, BottleneckX)):
+ return True
+ return False
+
+
+def is_norm(modules):
+ """Check if is one of the norms."""
+ if isinstance(modules, (GroupNorm, _BatchNorm)):
+ return True
+ return False
+
+
+def all_zeros(modules):
+ """Check if the weight(and bias) is all zero."""
+ weight_zero = torch.allclose(modules.weight.data,
+ torch.zeros_like(modules.weight.data))
+ if hasattr(modules, 'bias'):
+ bias_zero = torch.allclose(modules.bias.data,
+ torch.zeros_like(modules.bias.data))
+ else:
+ bias_zero = True
+
+ return weight_zero and bias_zero
+
+
+def check_norm_state(modules, train_state):
+ """Check if norm layer is in correct train state."""
+ for mod in modules:
+ if isinstance(mod, _BatchNorm):
+ if mod.training != train_state:
+ return False
+ return True
+
+
+def test_resnet_basic_block():
+
+ with pytest.raises(AssertionError):
+ # Not implemented yet.
+ dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+ BasicBlock(64, 64, dcn=dcn)
+
+ with pytest.raises(AssertionError):
+ # Not implemented yet.
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ position='after_conv3')
+ ]
+ BasicBlock(64, 64, plugins=plugins)
+
+ with pytest.raises(AssertionError):
+ # Not implemented yet
+ plugins = [
+ dict(
+ cfg=dict(
+ type='GeneralizedAttention',
+ spatial_range=-1,
+ num_heads=8,
+ attention_type='0010',
+ kv_stride=2),
+ position='after_conv2')
+ ]
+ BasicBlock(64, 64, plugins=plugins)
+
+ # Test BasicBlock with checkpoint forward
+ block = BasicBlock(16, 16, with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 16, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 16, 56, 56])
+
+ # test BasicBlock structure and forward
+ block = BasicBlock(64, 64)
+ assert block.conv1.in_channels == 64
+ assert block.conv1.out_channels == 64
+ assert block.conv1.kernel_size == (3, 3)
+ assert block.conv2.in_channels == 64
+ assert block.conv2.out_channels == 64
+ assert block.conv2.kernel_size == (3, 3)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnet_bottleneck():
+
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ Bottleneck(64, 64, style='tensorflow')
+
+ with pytest.raises(AssertionError):
+ # Allowed positions are 'after_conv1', 'after_conv2', 'after_conv3'
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ position='after_conv4')
+ ]
+ Bottleneck(64, 16, plugins=plugins)
+
+ with pytest.raises(AssertionError):
+ # Need to specify different postfix to avoid duplicate plugin name
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ position='after_conv3'),
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ position='after_conv3')
+ ]
+ Bottleneck(64, 16, plugins=plugins)
+
+ with pytest.raises(KeyError):
+ # Plugin type is not supported
+ plugins = [dict(cfg=dict(type='WrongPlugin'), position='after_conv3')]
+ Bottleneck(64, 16, plugins=plugins)
+
+ # Test Bottleneck with checkpoint forward
+ block = Bottleneck(64, 16, with_cp=True)
+ assert block.with_cp
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test Bottleneck style
+ block = Bottleneck(64, 64, stride=2, style='pytorch')
+ assert block.conv1.stride == (1, 1)
+ assert block.conv2.stride == (2, 2)
+ block = Bottleneck(64, 64, stride=2, style='caffe')
+ assert block.conv1.stride == (2, 2)
+ assert block.conv2.stride == (1, 1)
+
+ # Test Bottleneck DCN
+ dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+ with pytest.raises(AssertionError):
+ Bottleneck(64, 64, dcn=dcn, conv_cfg=dict(type='Conv'))
+ block = Bottleneck(64, 64, dcn=dcn)
+ assert isinstance(block.conv2, DeformConv2dPack)
+
+ # Test Bottleneck forward
+ block = Bottleneck(64, 16)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test Bottleneck with 1 ContextBlock after conv3
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ position='after_conv3')
+ ]
+ block = Bottleneck(64, 16, plugins=plugins)
+ assert block.context_block.in_channels == 64
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test Bottleneck with 1 GeneralizedAttention after conv2
+ plugins = [
+ dict(
+ cfg=dict(
+ type='GeneralizedAttention',
+ spatial_range=-1,
+ num_heads=8,
+ attention_type='0010',
+ kv_stride=2),
+ position='after_conv2')
+ ]
+ block = Bottleneck(64, 16, plugins=plugins)
+ assert block.gen_attention_block.in_channels == 16
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test Bottleneck with 1 GeneralizedAttention after conv2, 1 NonLocal2d
+ # after conv2, 1 ContextBlock after conv3
+ plugins = [
+ dict(
+ cfg=dict(
+ type='GeneralizedAttention',
+ spatial_range=-1,
+ num_heads=8,
+ attention_type='0010',
+ kv_stride=2),
+ position='after_conv2'),
+ dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ position='after_conv3')
+ ]
+ block = Bottleneck(64, 16, plugins=plugins)
+ assert block.gen_attention_block.in_channels == 16
+ assert block.nonlocal_block.in_channels == 16
+ assert block.context_block.in_channels == 64
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test Bottleneck with 1 ContextBlock after conv2, 2 ContextBlock after
+ # conv3
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),
+ position='after_conv2'),
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),
+ position='after_conv3'),
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=3),
+ position='after_conv3')
+ ]
+ block = Bottleneck(64, 16, plugins=plugins)
+ assert block.context_block1.in_channels == 16
+ assert block.context_block2.in_channels == 64
+ assert block.context_block3.in_channels == 64
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnet_res_layer():
+ # Test ResLayer of 3 Bottleneck w\o downsample
+ layer = ResLayer(Bottleneck, 64, 16, 3)
+ assert len(layer) == 3
+ assert layer[0].conv1.in_channels == 64
+ assert layer[0].conv1.out_channels == 16
+ for i in range(1, len(layer)):
+ assert layer[i].conv1.in_channels == 64
+ assert layer[i].conv1.out_channels == 16
+ for i in range(len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test ResLayer of 3 Bottleneck with downsample
+ layer = ResLayer(Bottleneck, 64, 64, 3)
+ assert layer[0].downsample[0].out_channels == 256
+ for i in range(1, len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 256, 56, 56])
+
+ # Test ResLayer of 3 Bottleneck with stride=2
+ layer = ResLayer(Bottleneck, 64, 64, 3, stride=2)
+ assert layer[0].downsample[0].out_channels == 256
+ assert layer[0].downsample[0].stride == (2, 2)
+ for i in range(1, len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 256, 28, 28])
+
+ # Test ResLayer of 3 Bottleneck with stride=2 and average downsample
+ layer = ResLayer(Bottleneck, 64, 64, 3, stride=2, avg_down=True)
+ assert isinstance(layer[0].downsample[0], AvgPool2d)
+ assert layer[0].downsample[1].out_channels == 256
+ assert layer[0].downsample[1].stride == (1, 1)
+ for i in range(1, len(layer)):
+ assert layer[i].downsample is None
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 256, 28, 28])
+
+ # Test ResLayer of 3 Bottleneck with dilation=2
+ layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2)
+ for i in range(len(layer)):
+ assert layer[i].conv2.dilation == (2, 2)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test ResLayer of 3 Bottleneck with dilation=2, contract_dilation=True
+ layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2, contract_dilation=True)
+ assert layer[0].conv2.dilation == (1, 1)
+ for i in range(1, len(layer)):
+ assert layer[i].conv2.dilation == (2, 2)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+ # Test ResLayer of 3 Bottleneck with dilation=2, multi_grid
+ layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2, multi_grid=(1, 2, 4))
+ assert layer[0].conv2.dilation == (1, 1)
+ assert layer[1].conv2.dilation == (2, 2)
+ assert layer[2].conv2.dilation == (4, 4)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = layer(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnet_backbone():
+ """Test resnet backbone."""
+ with pytest.raises(KeyError):
+ # ResNet depth should be in [18, 34, 50, 101, 152]
+ ResNet(20)
+
+ with pytest.raises(AssertionError):
+ # In ResNet: 1 <= num_stages <= 4
+ ResNet(50, num_stages=0)
+
+ with pytest.raises(AssertionError):
+ # len(stage_with_dcn) == num_stages
+ dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+ ResNet(50, dcn=dcn, stage_with_dcn=(True, ))
+
+ with pytest.raises(AssertionError):
+ # len(stage_with_plugin) == num_stages
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ stages=(False, True, True),
+ position='after_conv3')
+ ]
+ ResNet(50, plugins=plugins)
+
+ with pytest.raises(AssertionError):
+ # In ResNet: 1 <= num_stages <= 4
+ ResNet(50, num_stages=5)
+
+ with pytest.raises(AssertionError):
+ # len(strides) == len(dilations) == num_stages
+ ResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)
+
+ with pytest.raises(TypeError):
+ # pretrained must be a string path
+ model = ResNet(50)
+ model.init_weights(pretrained=0)
+
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ ResNet(50, style='tensorflow')
+
+ # Test ResNet50 norm_eval=True
+ model = ResNet(50, norm_eval=True)
+ model.init_weights()
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test ResNet50 with torchvision pretrained weight
+ model = ResNet(depth=50, norm_eval=True)
+ model.init_weights('torchvision://resnet50')
+ model.train()
+ assert check_norm_state(model.modules(), False)
+
+ # Test ResNet50 with first stage frozen
+ frozen_stages = 1
+ model = ResNet(50, frozen_stages=frozen_stages)
+ model.init_weights()
+ model.train()
+ assert model.norm1.training is False
+ for layer in [model.conv1, model.norm1]:
+ for param in layer.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, 'layer{}'.format(i))
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test ResNet50V1d with first stage frozen
+ model = ResNetV1d(depth=50, frozen_stages=frozen_stages)
+ assert len(model.stem) == 9
+ model.init_weights()
+ model.train()
+ check_norm_state(model.stem, False)
+ for param in model.stem.parameters():
+ assert param.requires_grad is False
+ for i in range(1, frozen_stages + 1):
+ layer = getattr(model, 'layer{}'.format(i))
+ for mod in layer.modules():
+ if isinstance(mod, _BatchNorm):
+ assert mod.training is False
+ for param in layer.parameters():
+ assert param.requires_grad is False
+
+ # Test ResNet18 forward
+ model = ResNet(18)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 64, 56, 56])
+ assert feat[1].shape == torch.Size([1, 128, 28, 28])
+ assert feat[2].shape == torch.Size([1, 256, 14, 14])
+ assert feat[3].shape == torch.Size([1, 512, 7, 7])
+
+ # Test ResNet50 with BatchNorm forward
+ model = ResNet(50)
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, _BatchNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNet50 with layers 1, 2, 3 out forward
+ model = ResNet(50, out_indices=(0, 1, 2))
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+
+ # Test ResNet18 with checkpoint forward
+ model = ResNet(18, with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 64, 56, 56])
+ assert feat[1].shape == torch.Size([1, 128, 28, 28])
+ assert feat[2].shape == torch.Size([1, 256, 14, 14])
+ assert feat[3].shape == torch.Size([1, 512, 7, 7])
+
+ # Test ResNet50 with checkpoint forward
+ model = ResNet(50, with_cp=True)
+ for m in model.modules():
+ if is_block(m):
+ assert m.with_cp
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNet50 with GroupNorm forward
+ model = ResNet(
+ 50, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))
+ for m in model.modules():
+ if is_norm(m):
+ assert isinstance(m, GroupNorm)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNet50 with 1 GeneralizedAttention after conv2, 1 NonLocal2d
+ # after conv2, 1 ContextBlock after conv3 in layers 2, 3, 4
+ plugins = [
+ dict(
+ cfg=dict(
+ type='GeneralizedAttention',
+ spatial_range=-1,
+ num_heads=8,
+ attention_type='0010',
+ kv_stride=2),
+ stages=(False, True, True, True),
+ position='after_conv2'),
+ dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16),
+ stages=(False, True, True, False),
+ position='after_conv3')
+ ]
+ model = ResNet(50, plugins=plugins)
+ for m in model.layer1.modules():
+ if is_block(m):
+ assert not hasattr(m, 'context_block')
+ assert not hasattr(m, 'gen_attention_block')
+ assert m.nonlocal_block.in_channels == 64
+ for m in model.layer2.modules():
+ if is_block(m):
+ assert m.nonlocal_block.in_channels == 128
+ assert m.gen_attention_block.in_channels == 128
+ assert m.context_block.in_channels == 512
+
+ for m in model.layer3.modules():
+ if is_block(m):
+ assert m.nonlocal_block.in_channels == 256
+ assert m.gen_attention_block.in_channels == 256
+ assert m.context_block.in_channels == 1024
+
+ for m in model.layer4.modules():
+ if is_block(m):
+ assert m.nonlocal_block.in_channels == 512
+ assert m.gen_attention_block.in_channels == 512
+ assert not hasattr(m, 'context_block')
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNet50 with 1 ContextBlock after conv2, 1 ContextBlock after
+ # conv3 in layers 2, 3, 4
+ plugins = [
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),
+ stages=(False, True, True, False),
+ position='after_conv3'),
+ dict(
+ cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),
+ stages=(False, True, True, False),
+ position='after_conv3')
+ ]
+
+ model = ResNet(50, plugins=plugins)
+ for m in model.layer1.modules():
+ if is_block(m):
+ assert not hasattr(m, 'context_block')
+ assert not hasattr(m, 'context_block1')
+ assert not hasattr(m, 'context_block2')
+ for m in model.layer2.modules():
+ if is_block(m):
+ assert not hasattr(m, 'context_block')
+ assert m.context_block1.in_channels == 512
+ assert m.context_block2.in_channels == 512
+
+ for m in model.layer3.modules():
+ if is_block(m):
+ assert not hasattr(m, 'context_block')
+ assert m.context_block1.in_channels == 1024
+ assert m.context_block2.in_channels == 1024
+
+ for m in model.layer4.modules():
+ if is_block(m):
+ assert not hasattr(m, 'context_block')
+ assert not hasattr(m, 'context_block1')
+ assert not hasattr(m, 'context_block2')
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNet50 zero initialization of residual
+ model = ResNet(50, zero_init_residual=True)
+ model.init_weights()
+ for m in model.modules():
+ if isinstance(m, Bottleneck):
+ assert all_zeros(m.norm3)
+ elif isinstance(m, BasicBlock):
+ assert all_zeros(m.norm2)
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+ # Test ResNetV1d forward
+ model = ResNetV1d(depth=50)
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+
+def test_renext_bottleneck():
+ with pytest.raises(AssertionError):
+ # Style must be in ['pytorch', 'caffe']
+ BottleneckX(64, 64, groups=32, base_width=4, style='tensorflow')
+
+ # Test ResNeXt Bottleneck structure
+ block = BottleneckX(
+ 64, 64, groups=32, base_width=4, stride=2, style='pytorch')
+ assert block.conv2.stride == (2, 2)
+ assert block.conv2.groups == 32
+ assert block.conv2.out_channels == 128
+
+ # Test ResNeXt Bottleneck with DCN
+ dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+ with pytest.raises(AssertionError):
+ # conv_cfg must be None if dcn is not None
+ BottleneckX(
+ 64,
+ 64,
+ groups=32,
+ base_width=4,
+ dcn=dcn,
+ conv_cfg=dict(type='Conv'))
+ BottleneckX(64, 64, dcn=dcn)
+
+ # Test ResNeXt Bottleneck forward
+ block = BottleneckX(64, 16, groups=32, base_width=4)
+ x = torch.randn(1, 64, 56, 56)
+ x_out = block(x)
+ assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnext_backbone():
+ with pytest.raises(KeyError):
+ # ResNeXt depth should be in [50, 101, 152]
+ ResNeXt(depth=18)
+
+ # Test ResNeXt with group 32, base_width 4
+ model = ResNeXt(depth=50, groups=32, base_width=4)
+ print(model)
+ for m in model.modules():
+ if is_block(m):
+ assert m.conv2.groups == 32
+ model.init_weights()
+ model.train()
+
+ imgs = torch.randn(1, 3, 224, 224)
+ feat = model(imgs)
+ assert len(feat) == 4
+ assert feat[0].shape == torch.Size([1, 256, 56, 56])
+ assert feat[1].shape == torch.Size([1, 512, 28, 28])
+ assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+ assert feat[3].shape == torch.Size([1, 2048, 7, 7])
diff --git a/tests/test_models/test_forward.py b/tests/test_models/test_forward.py
new file mode 100644
index 0000000000..620b82e64d
--- /dev/null
+++ b/tests/test_models/test_forward.py
@@ -0,0 +1,229 @@
+"""pytest tests/test_forward.py."""
+import copy
+from os.path import dirname, exists, join
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm
+
+
+def _demo_mm_inputs(input_shape=(2, 3, 8, 16), num_classes=10):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+
+ num_classes (int):
+ number of semantic classes
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+ segs = rng.randint(
+ low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8)
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'ori_shape': (H, W, C),
+ 'pad_shape': (H, W, C),
+ 'filename': '.png',
+ 'scale_factor': 1.0,
+ 'flip': False,
+ 'flip_direction': 'horizontal'
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'imgs': torch.FloatTensor(imgs),
+ 'img_metas': img_metas,
+ 'gt_semantic_seg': torch.LongTensor(segs)
+ }
+ return mm_inputs
+
+
+def _get_config_directory():
+ """Find the predefined segmentor config directory."""
+ try:
+ # Assume we are running in the source mmsegmentation repo
+ repo_dpath = dirname(dirname(dirname(__file__)))
+ except NameError:
+ # For IPython development when this __file__ is not defined
+ import mmseg
+ repo_dpath = dirname(dirname(dirname(mmseg.__file__)))
+ config_dpath = join(repo_dpath, 'configs')
+ if not exists(config_dpath):
+ raise Exception('Cannot find config path')
+ return config_dpath
+
+
+def _get_config_module(fname):
+ """Load a configuration as a python module."""
+ from mmcv import Config
+ config_dpath = _get_config_directory()
+ config_fpath = join(config_dpath, fname)
+ config_mod = Config.fromfile(config_fpath)
+ return config_mod
+
+
+def _get_segmentor_cfg(fname):
+ """Grab configs necessary to create a segmentor.
+
+ These are deep copied to allow for safe modification of parameters without
+ influencing other tests.
+ """
+ import mmcv
+ config = _get_config_module(fname)
+ model = copy.deepcopy(config.model)
+ train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
+ test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
+ return model, train_cfg, test_cfg
+
+
+def test_pspnet_forward():
+ _test_encoder_decoder_forward(
+ 'pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_fcn_forward():
+ _test_encoder_decoder_forward('fcn/fcn_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_deeplabv3_forward():
+ _test_encoder_decoder_forward(
+ 'deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_deeplabv3plus_forward():
+ _test_encoder_decoder_forward(
+ 'deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_gcnet_forward():
+ _test_encoder_decoder_forward(
+ 'gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_ann_forward():
+ _test_encoder_decoder_forward('ann/ann_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_ccnet_forward():
+ if not torch.cuda.is_available():
+ pytest.skip('CCNet requires CUDA')
+ _test_encoder_decoder_forward(
+ 'ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_danet_forward():
+ _test_encoder_decoder_forward(
+ 'danet/danet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_nonlocal_net_forward():
+ _test_encoder_decoder_forward(
+ 'nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_upernet_forward():
+ _test_encoder_decoder_forward(
+ 'upernet/upernet_r50_512x1024_40k_cityscapes.py')
+
+
+def test_hrnet_forward():
+ _test_encoder_decoder_forward('hrnet/fcn_hr18s_512x1024_40k_cityscapes.py')
+
+
+def test_ocrnet_forward():
+ _test_encoder_decoder_forward(
+ 'ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py')
+
+
+def test_psanet_forward():
+ _test_encoder_decoder_forward(
+ 'psanet/psanet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_encnet_forward():
+ _test_encoder_decoder_forward(
+ 'encnet/encnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def get_world_size(process_group):
+
+ return 1
+
+
+def _check_input_dim(self, inputs):
+ pass
+
+
+def _convert_batchnorm(module):
+ module_output = module
+ if isinstance(module, SyncBatchNorm):
+ # to be consistent with SyncBN, we hack dim check function in BN
+ module_output = _BatchNorm(module.num_features, module.eps,
+ module.momentum, module.affine,
+ module.track_running_stats)
+ if module.affine:
+ module_output.weight.data = module.weight.data.clone().detach()
+ module_output.bias.data = module.bias.data.clone().detach()
+ # keep requires_grad unchanged
+ module_output.weight.requires_grad = module.weight.requires_grad
+ module_output.bias.requires_grad = module.bias.requires_grad
+ module_output.running_mean = module.running_mean
+ module_output.running_var = module.running_var
+ module_output.num_batches_tracked = module.num_batches_tracked
+ for name, child in module.named_children():
+ module_output.add_module(name, _convert_batchnorm(child))
+ del module
+ return module_output
+
+
+@patch('torch.nn.modules.batchnorm._BatchNorm._check_input_dim',
+ _check_input_dim)
+@patch('torch.distributed.get_world_size', get_world_size)
+def _test_encoder_decoder_forward(cfg_file):
+ model, train_cfg, test_cfg = _get_segmentor_cfg(cfg_file)
+ model['pretrained'] = None
+ test_cfg['mode'] = 'whole'
+
+ from mmseg.models import build_segmentor
+ segmentor = build_segmentor(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ if isinstance(segmentor.decode_head, nn.ModuleList):
+ num_classes = segmentor.decode_head[-1].num_classes
+ else:
+ num_classes = segmentor.decode_head.num_classes
+ # batch_size=2 for BatchNorm
+ input_shape = (2, 3, 32, 32)
+ mm_inputs = _demo_mm_inputs(input_shape, num_classes=num_classes)
+
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_semantic_seg = mm_inputs['gt_semantic_seg']
+
+ # convert to cuda Tensor if applicable
+ if torch.cuda.is_available():
+ segmentor = segmentor.cuda()
+ imgs = imgs.cuda()
+ gt_semantic_seg = gt_semantic_seg.cuda()
+ else:
+ segmentor = _convert_batchnorm(segmentor)
+
+ # Test forward train
+ losses = segmentor.forward(
+ imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ segmentor.eval()
+ # pack into lists
+ img_list = [img[None, :] for img in imgs]
+ img_meta_list = [[img_meta] for img_meta in img_metas]
+ segmentor.forward(img_list, img_meta_list, return_loss=False)
diff --git a/tests/test_models/test_heads.py b/tests/test_models/test_heads.py
new file mode 100644
index 0000000000..935239438f
--- /dev/null
+++ b/tests/test_models/test_heads.py
@@ -0,0 +1,541 @@
+from unittest.mock import patch
+
+import pytest
+import torch
+from mmcv.cnn import ConvModule
+from mmcv.utils.parrots_wrapper import SyncBatchNorm
+
+from mmseg.models.decode_heads import (ANNHead, ASPPHead, CCHead, DAHead,
+ DepthwiseSeparableASPPHead, EncHead,
+ FCNHead, GCHead, NLHead, OCRHead,
+ PSAHead, PSPHead, UPerHead)
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+
+
+def _conv_has_norm(module, sync_bn):
+ for m in module.modules():
+ if isinstance(m, ConvModule):
+ if not m.with_norm:
+ return False
+ if sync_bn:
+ if not isinstance(m.bn, SyncBatchNorm):
+ return False
+ return True
+
+
+def to_cuda(module, data):
+ module = module.cuda()
+ if isinstance(data, list):
+ for i in range(len(data)):
+ data[i] = data[i].cuda()
+ return module, data
+
+
+@patch.multiple(BaseDecodeHead, __abstractmethods__=set())
+def test_decode_head():
+
+ with pytest.raises(AssertionError):
+ # default input_transform doesn't accept multiple inputs
+ BaseDecodeHead([32, 16], 16, num_classes=19)
+
+ with pytest.raises(AssertionError):
+ # default input_transform doesn't accept multiple inputs
+ BaseDecodeHead(32, 16, num_classes=19, in_index=[-1, -2])
+
+ with pytest.raises(AssertionError):
+ # supported mode is resize_concat only
+ BaseDecodeHead(32, 16, num_classes=19, input_transform='concat')
+
+ with pytest.raises(AssertionError):
+ # in_channels should be list|tuple
+ BaseDecodeHead(32, 16, num_classes=19, input_transform='resize_concat')
+
+ with pytest.raises(AssertionError):
+ # in_index should be list|tuple
+ BaseDecodeHead([32],
+ 16,
+ in_index=-1,
+ num_classes=19,
+ input_transform='resize_concat')
+
+ with pytest.raises(AssertionError):
+ # len(in_index) should equal len(in_channels)
+ BaseDecodeHead([32, 16],
+ 16,
+ num_classes=19,
+ in_index=[-1],
+ input_transform='resize_concat')
+
+ # test default dropout
+ head = BaseDecodeHead(32, 16, num_classes=19)
+ assert hasattr(head, 'dropout') and head.dropout.p == 0.1
+
+ # test set dropout
+ head = BaseDecodeHead(32, 16, num_classes=19, drop_out_ratio=0.2)
+ assert hasattr(head, 'dropout') and head.dropout.p == 0.2
+
+ # test no input_transform
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = BaseDecodeHead(32, 16, num_classes=19)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.in_channels == 32
+ assert head.input_transform is None
+ transformed_inputs = head._transform_inputs(inputs)
+ assert transformed_inputs.shape == (1, 32, 45, 45)
+
+ # test input_transform = resize_concat
+ inputs = [torch.randn(1, 32, 45, 45), torch.randn(1, 16, 21, 21)]
+ head = BaseDecodeHead([32, 16],
+ 16,
+ num_classes=19,
+ in_index=[0, 1],
+ input_transform='resize_concat')
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.in_channels == 48
+ assert head.input_transform == 'resize_concat'
+ transformed_inputs = head._transform_inputs(inputs)
+ assert transformed_inputs.shape == (1, 48, 45, 45)
+
+
+def test_fcn_head():
+
+ with pytest.raises(AssertionError):
+ # num_convs must be larger than 0
+ FCNHead(num_classes=19, num_convs=0)
+
+ # test no norm_cfg
+ head = FCNHead(in_channels=32, channels=16, num_classes=19)
+ for m in head.modules():
+ if isinstance(m, ConvModule):
+ assert not m.with_norm
+
+ # test with norm_cfg
+ head = FCNHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ norm_cfg=dict(type='SyncBN'))
+ for m in head.modules():
+ if isinstance(m, ConvModule):
+ assert m.with_norm and isinstance(m.bn, SyncBatchNorm)
+
+ # test concat_input=False
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = FCNHead(
+ in_channels=32, channels=16, num_classes=19, concat_input=False)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert len(head.convs) == 2
+ assert not head.concat_input and not hasattr(head, 'conv_cat')
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+ # test concat_input=True
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = FCNHead(
+ in_channels=32, channels=16, num_classes=19, concat_input=True)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert len(head.convs) == 2
+ assert head.concat_input
+ assert head.conv_cat.in_channels == 48
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+ # test kernel_size=3
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = FCNHead(in_channels=32, channels=16, num_classes=19)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ for i in range(len(head.convs)):
+ assert head.convs[i].kernel_size == (3, 3)
+ assert head.convs[i].padding == 1
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+ # test kernel_size=1
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = FCNHead(in_channels=32, channels=16, num_classes=19, kernel_size=1)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ for i in range(len(head.convs)):
+ assert head.convs[i].kernel_size == (1, 1)
+ assert head.convs[i].padding == 0
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+ # test num_conv
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = FCNHead(in_channels=32, channels=16, num_classes=19, num_convs=1)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert len(head.convs) == 1
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_psp_head():
+
+ with pytest.raises(AssertionError):
+ # pool_scales must be list|tuple
+ PSPHead(in_channels=32, channels=16, num_classes=19, pool_scales=1)
+
+ # test no norm_cfg
+ head = PSPHead(in_channels=32, channels=16, num_classes=19)
+ assert not _conv_has_norm(head, sync_bn=False)
+
+ # test with norm_cfg
+ head = PSPHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ norm_cfg=dict(type='SyncBN'))
+ assert _conv_has_norm(head, sync_bn=True)
+
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = PSPHead(
+ in_channels=32, channels=16, num_classes=19, pool_scales=(1, 2, 3))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.psp_modules[0][0].output_size == 1
+ assert head.psp_modules[1][0].output_size == 2
+ assert head.psp_modules[2][0].output_size == 3
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_aspp_head():
+
+ with pytest.raises(AssertionError):
+ # pool_scales must be list|tuple
+ ASPPHead(in_channels=32, channels=16, num_classes=19, dilations=1)
+
+ # test no norm_cfg
+ head = ASPPHead(in_channels=32, channels=16, num_classes=19)
+ assert not _conv_has_norm(head, sync_bn=False)
+
+ # test with norm_cfg
+ head = ASPPHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ norm_cfg=dict(type='SyncBN'))
+ assert _conv_has_norm(head, sync_bn=True)
+
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = ASPPHead(
+ in_channels=32, channels=16, num_classes=19, dilations=(1, 12, 24))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.aspp_modules[0].conv.dilation == (1, 1)
+ assert head.aspp_modules[1].conv.dilation == (12, 12)
+ assert head.aspp_modules[2].conv.dilation == (24, 24)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_psa_head():
+
+ with pytest.raises(AssertionError):
+ # psa_type must be in 'bi-direction', 'collect', 'distribute'
+ PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ psa_type='gather')
+
+ # test no norm_cfg
+ head = PSAHead(
+ in_channels=32, channels=16, num_classes=19, mask_size=(39, 39))
+ assert not _conv_has_norm(head, sync_bn=False)
+
+ # test with norm_cfg
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ norm_cfg=dict(type='SyncBN'))
+ assert _conv_has_norm(head, sync_bn=True)
+
+ # test 'bi-direction' psa_type
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32, channels=16, num_classes=19, mask_size=(39, 39))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+ # test 'bi-direction' psa_type, shrink_factor=1
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ shrink_factor=1)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+ # test 'bi-direction' psa_type with soft_max
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ psa_softmax=True)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+ # test 'collect' psa_type
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ psa_type='collect')
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+ # test 'collect' psa_type, shrink_factor=1
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ shrink_factor=1,
+ psa_type='collect')
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+ # test 'collect' psa_type, shrink_factor=1, compact=True
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ psa_type='collect',
+ shrink_factor=1,
+ compact=True)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+ # test 'distribute' psa_type
+ inputs = [torch.randn(1, 32, 39, 39)]
+ head = PSAHead(
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ mask_size=(39, 39),
+ psa_type='distribute')
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 39, 39)
+
+
+def test_gc_head():
+ head = GCHead(in_channels=32, channels=16, num_classes=19)
+ assert len(head.convs) == 2
+ assert hasattr(head, 'gc_block')
+ inputs = [torch.randn(1, 32, 45, 45)]
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_nl_head():
+ head = NLHead(in_channels=32, channels=16, num_classes=19)
+ assert len(head.convs) == 2
+ assert hasattr(head, 'nl_block')
+ inputs = [torch.randn(1, 32, 45, 45)]
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_cc_head():
+ head = CCHead(in_channels=32, channels=16, num_classes=19)
+ assert len(head.convs) == 2
+ assert hasattr(head, 'cca')
+ if not torch.cuda.is_available():
+ pytest.skip('CCHead requires CUDA')
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_uper_head():
+
+ with pytest.raises(AssertionError):
+ # fpn_in_channels must be list|tuple
+ UPerHead(in_channels=32, channels=16, num_classes=19)
+
+ # test no norm_cfg
+ head = UPerHead(
+ in_channels=[32, 16], channels=16, num_classes=19, in_index=[-2, -1])
+ assert not _conv_has_norm(head, sync_bn=False)
+
+ # test with norm_cfg
+ head = UPerHead(
+ in_channels=[32, 16],
+ channels=16,
+ num_classes=19,
+ norm_cfg=dict(type='SyncBN'),
+ in_index=[-2, -1])
+ assert _conv_has_norm(head, sync_bn=True)
+
+ inputs = [torch.randn(1, 32, 45, 45), torch.randn(1, 16, 21, 21)]
+ head = UPerHead(
+ in_channels=[32, 16], channels=16, num_classes=19, in_index=[-2, -1])
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_ann_head():
+
+ inputs = [torch.randn(1, 16, 45, 45), torch.randn(1, 32, 21, 21)]
+ head = ANNHead(
+ in_channels=[16, 32],
+ channels=16,
+ num_classes=19,
+ in_index=[-2, -1],
+ project_channels=8)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 21, 21)
+
+
+def test_da_head():
+
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = DAHead(in_channels=32, channels=16, num_classes=19, pam_channels=8)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert isinstance(outputs, tuple) and len(outputs) == 3
+ for output in outputs:
+ assert output.shape == (1, head.num_classes, 45, 45)
+ test_output = head.forward_test(inputs, None, None)
+ assert test_output.shape == (1, head.num_classes, 45, 45)
+
+
+def test_ocr_head():
+
+ inputs = [torch.randn(1, 32, 45, 45)]
+ ocr_head = OCRHead(
+ in_channels=32, channels=16, num_classes=19, ocr_channels=8)
+ fcn_head = FCNHead(in_channels=32, channels=16, num_classes=19)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(ocr_head, inputs)
+ head, inputs = to_cuda(fcn_head, inputs)
+ prev_output = fcn_head(inputs)
+ output = ocr_head(inputs, prev_output)
+ assert output.shape == (1, ocr_head.num_classes, 45, 45)
+
+
+def test_enc_head():
+ # with se_loss, w.o. lateral
+ inputs = [torch.randn(1, 32, 21, 21)]
+ head = EncHead(
+ in_channels=[32], channels=16, num_classes=19, in_index=[-1])
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert isinstance(outputs, tuple) and len(outputs) == 2
+ assert outputs[0].shape == (1, head.num_classes, 21, 21)
+ assert outputs[1].shape == (1, head.num_classes)
+
+ # w.o se_loss, w.o. lateral
+ inputs = [torch.randn(1, 32, 21, 21)]
+ head = EncHead(
+ in_channels=[32],
+ channels=16,
+ use_se_loss=False,
+ num_classes=19,
+ in_index=[-1])
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 21, 21)
+
+ # with se_loss, with lateral
+ inputs = [torch.randn(1, 16, 45, 45), torch.randn(1, 32, 21, 21)]
+ head = EncHead(
+ in_channels=[16, 32],
+ channels=16,
+ add_lateral=True,
+ num_classes=19,
+ in_index=[-2, -1])
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ outputs = head(inputs)
+ assert isinstance(outputs, tuple) and len(outputs) == 2
+ assert outputs[0].shape == (1, head.num_classes, 21, 21)
+ assert outputs[1].shape == (1, head.num_classes)
+ test_output = head.forward_test(inputs, None, None)
+ assert test_output.shape == (1, head.num_classes, 21, 21)
+
+
+def test_dw_aspp_head():
+
+ # test w.o. c1
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = DepthwiseSeparableASPPHead(
+ c1_in_channels=0,
+ c1_channels=0,
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ dilations=(1, 12, 24))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.c1_bottleneck is None
+ assert head.aspp_modules[0].conv.dilation == (1, 1)
+ assert head.aspp_modules[1].depthwise_conv.dilation == (12, 12)
+ assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
+
+ # test with c1
+ inputs = [torch.randn(1, 8, 45, 45), torch.randn(1, 32, 21, 21)]
+ head = DepthwiseSeparableASPPHead(
+ c1_in_channels=8,
+ c1_channels=4,
+ in_channels=32,
+ channels=16,
+ num_classes=19,
+ dilations=(1, 12, 24))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.c1_bottleneck.in_channels == 8
+ assert head.c1_bottleneck.out_channels == 4
+ assert head.aspp_modules[0].conv.dilation == (1, 1)
+ assert head.aspp_modules[1].depthwise_conv.dilation == (12, 12)
+ assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24)
+ outputs = head(inputs)
+ assert outputs.shape == (1, head.num_classes, 45, 45)
diff --git a/tests/test_models/test_losses.py b/tests/test_models/test_losses.py
new file mode 100644
index 0000000000..edae6bfd16
--- /dev/null
+++ b/tests/test_models/test_losses.py
@@ -0,0 +1,134 @@
+import numpy as np
+import pytest
+import torch
+
+from mmseg.models.losses import Accuracy, reduce_loss, weight_reduce_loss
+
+
+def test_utils():
+ loss = torch.rand(1, 3, 4, 4)
+ weight = torch.zeros(1, 3, 4, 4)
+ weight[:, :, :2, :2] = 1
+
+ # test reduce_loss()
+ reduced = reduce_loss(loss, 'none')
+ assert reduced is loss
+
+ reduced = reduce_loss(loss, 'mean')
+ np.testing.assert_almost_equal(reduced.numpy(), loss.mean())
+
+ reduced = reduce_loss(loss, 'sum')
+ np.testing.assert_almost_equal(reduced.numpy(), loss.sum())
+
+ # test weight_reduce_loss()
+ reduced = weight_reduce_loss(loss, weight=None, reduction='none')
+ assert reduced is loss
+
+ reduced = weight_reduce_loss(loss, weight=weight, reduction='mean')
+ target = (loss * weight).mean()
+ np.testing.assert_almost_equal(reduced.numpy(), target)
+
+ reduced = weight_reduce_loss(loss, weight=weight, reduction='sum')
+ np.testing.assert_almost_equal(reduced.numpy(), (loss * weight).sum())
+
+ with pytest.raises(AssertionError):
+ weight_wrong = weight[0, 0, ...]
+ weight_reduce_loss(loss, weight=weight_wrong, reduction='mean')
+
+ with pytest.raises(AssertionError):
+ weight_wrong = weight[:, 0:2, ...]
+ weight_reduce_loss(loss, weight=weight_wrong, reduction='mean')
+
+
+def test_ce_loss():
+ from mmseg.models import build_loss
+
+ # use_mask and use_sigmoid cannot be true at the same time
+ with pytest.raises(AssertionError):
+ loss_cfg = dict(
+ type='CrossEntropyLoss',
+ use_mask=True,
+ use_sigmoid=True,
+ loss_weight=1.0)
+ build_loss(loss_cfg)
+
+ # test loss with class weights
+ loss_cls_cfg = dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ class_weight=[0.8, 0.2],
+ loss_weight=1.0)
+ loss_cls = build_loss(loss_cls_cfg)
+ fake_pred = torch.Tensor([[100, -100]])
+ fake_label = torch.Tensor([1]).long()
+ assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(40.))
+
+ loss_cls_cfg = dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)
+ loss_cls = build_loss(loss_cls_cfg)
+ assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(200.))
+
+ loss_cls_cfg = dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)
+ loss_cls = build_loss(loss_cls_cfg)
+ assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(0.))
+
+ # TODO test use_mask
+
+
+def test_accuracy():
+ # test for empty pred
+ pred = torch.empty(0, 4)
+ label = torch.empty(0)
+ accuracy = Accuracy(topk=1)
+ acc = accuracy(pred, label)
+ assert acc.item() == 0
+
+ pred = torch.Tensor([[0.2, 0.3, 0.6, 0.5], [0.1, 0.1, 0.2, 0.6],
+ [0.9, 0.0, 0.0, 0.1], [0.4, 0.7, 0.1, 0.1],
+ [0.0, 0.0, 0.99, 0]])
+ # test for top1
+ true_label = torch.Tensor([2, 3, 0, 1, 2]).long()
+ accuracy = Accuracy(topk=1)
+ acc = accuracy(pred, true_label)
+ assert acc.item() == 100
+
+ # test for top1 with score thresh=0.8
+ true_label = torch.Tensor([2, 3, 0, 1, 2]).long()
+ accuracy = Accuracy(topk=1, thresh=0.8)
+ acc = accuracy(pred, true_label)
+ assert acc.item() == 40
+
+ # test for top2
+ accuracy = Accuracy(topk=2)
+ label = torch.Tensor([3, 2, 0, 0, 2]).long()
+ acc = accuracy(pred, label)
+ assert acc.item() == 100
+
+ # test for both top1 and top2
+ accuracy = Accuracy(topk=(1, 2))
+ true_label = torch.Tensor([2, 3, 0, 1, 2]).long()
+ acc = accuracy(pred, true_label)
+ for a in acc:
+ assert a.item() == 100
+
+ # topk is larger than pred class number
+ with pytest.raises(AssertionError):
+ accuracy = Accuracy(topk=5)
+ accuracy(pred, true_label)
+
+ # wrong topk type
+ with pytest.raises(AssertionError):
+ accuracy = Accuracy(topk='wrong type')
+ accuracy(pred, true_label)
+
+ # label size is larger than required
+ with pytest.raises(AssertionError):
+ label = torch.Tensor([2, 3, 0, 1, 2, 0]).long() # size mismatch
+ accuracy = Accuracy()
+ accuracy(pred, label)
+
+ # wrong pred dimension
+ with pytest.raises(AssertionError):
+ accuracy = Accuracy()
+ accuracy(pred[:, :, None], true_label)
diff --git a/tests/test_models/test_segmentor.py b/tests/test_models/test_segmentor.py
new file mode 100644
index 0000000000..67f7884bc8
--- /dev/null
+++ b/tests/test_models/test_segmentor.py
@@ -0,0 +1,212 @@
+import mmcv
+import numpy as np
+import torch
+from torch import nn
+
+from mmseg.models import BACKBONES, HEADS, build_segmentor
+from mmseg.models.decode_heads.cascade_decode_head import BaseCascadeDecodeHead
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 8, 16), num_classes=10):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+
+ num_classes (int):
+ number of semantic classes
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+ segs = rng.randint(
+ low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8)
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'ori_shape': (H, W, C),
+ 'pad_shape': (H, W, C),
+ 'filename': '.png',
+ 'scale_factor': 1.0,
+ 'flip': False,
+ 'flip_direction': 'horizontal'
+ } for _ in range(N)]
+
+ mm_inputs = {
+ 'imgs': torch.FloatTensor(imgs),
+ 'img_metas': img_metas,
+ 'gt_semantic_seg': torch.LongTensor(segs)
+ }
+ return mm_inputs
+
+
+@BACKBONES.register_module()
+class ExampleBackbone(nn.Module):
+
+ def __init__(self):
+ super(ExampleBackbone, self).__init__()
+ self.conv = nn.Conv2d(3, 3, 3)
+
+ def init_weights(self, pretrained=None):
+ pass
+
+ def forward(self, x):
+ return [self.conv(x)]
+
+
+@HEADS.register_module()
+class ExampleDecodeHead(BaseDecodeHead):
+
+ def __init__(self):
+ super(ExampleDecodeHead, self).__init__(3, 3, num_classes=19)
+
+ def forward(self, inputs):
+ return self.cls_seg(inputs[0])
+
+
+@HEADS.register_module()
+class ExampleCascadeDecodeHead(BaseCascadeDecodeHead):
+
+ def __init__(self):
+ super(ExampleCascadeDecodeHead, self).__init__(3, 3, num_classes=19)
+
+ def forward(self, inputs, prev_out):
+ return self.cls_seg(inputs[0])
+
+
+def _segmentor_forward_train_test(segmentor):
+ if isinstance(segmentor.decode_head, nn.ModuleList):
+ num_classes = segmentor.decode_head[-1].num_classes
+ else:
+ num_classes = segmentor.decode_head.num_classes
+ # batch_size=2 for BatchNorm
+ mm_inputs = _demo_mm_inputs(num_classes=num_classes)
+
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_semantic_seg = mm_inputs['gt_semantic_seg']
+
+ # convert to cuda Tensor if applicable
+ if torch.cuda.is_available():
+ segmentor = segmentor.cuda()
+ imgs = imgs.cuda()
+ gt_semantic_seg = gt_semantic_seg.cuda()
+
+ # Test forward train
+ losses = segmentor.forward(
+ imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward simple test
+ with torch.no_grad():
+ segmentor.eval()
+ # pack into lists
+ img_list = [img[None, :] for img in imgs]
+ img_meta_list = [[img_meta] for img_meta in img_metas]
+ segmentor.forward(img_list, img_meta_list, return_loss=False)
+
+ # Test forward aug test
+ with torch.no_grad():
+ segmentor.eval()
+ # pack into lists
+ img_list = [img[None, :] for img in imgs]
+ img_list = img_list + img_list
+ img_meta_list = [[img_meta] for img_meta in img_metas]
+ img_meta_list = img_meta_list + img_meta_list
+ segmentor.forward(img_list, img_meta_list, return_loss=False)
+
+
+def test_encoder_decoder():
+
+ # test 1 decode head, w.o. aux head
+ cfg = dict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(type='ExampleDecodeHead'))
+ test_cfg = mmcv.Config(dict(mode='whole'))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test slide mode
+ test_cfg = mmcv.Config(dict(mode='slide', crop_size=(3, 3), stride=(2, 2)))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 1 aux head
+ cfg = dict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(type='ExampleDecodeHead'),
+ auxiliary_head=dict(type='ExampleDecodeHead'))
+ test_cfg = mmcv.Config(dict(mode='whole'))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 2 aux head
+ cfg = dict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(type='ExampleDecodeHead'),
+ auxiliary_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleDecodeHead')
+ ])
+ test_cfg = mmcv.Config(dict(mode='whole'))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+
+def test_cascade_encoder_decoder():
+
+ # test 1 decode head, w.o. aux head
+ cfg = dict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleCascadeDecodeHead')
+ ])
+ test_cfg = mmcv.Config(dict(mode='whole'))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test slide mode
+ test_cfg = mmcv.Config(dict(mode='slide', crop_size=(3, 3), stride=(2, 2)))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 1 aux head
+ cfg = dict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleCascadeDecodeHead')
+ ],
+ auxiliary_head=dict(type='ExampleDecodeHead'))
+ test_cfg = mmcv.Config(dict(mode='whole'))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 2 aux head
+ cfg = dict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleCascadeDecodeHead')
+ ],
+ auxiliary_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleDecodeHead')
+ ])
+ test_cfg = mmcv.Config(dict(mode='whole'))
+ segmentor = build_segmentor(cfg, train_cfg=None, test_cfg=test_cfg)
+ _segmentor_forward_train_test(segmentor)
diff --git a/tests/test_ops/test_sep_conv_module.py b/tests/test_ops/test_sep_conv_module.py
new file mode 100644
index 0000000000..4eb650111c
--- /dev/null
+++ b/tests/test_ops/test_sep_conv_module.py
@@ -0,0 +1,71 @@
+import pytest
+import torch
+import torch.nn as nn
+
+from mmseg.ops import DepthwiseSeparableConvModule
+
+
+def test_depthwise_separable_conv():
+ with pytest.raises(AssertionError):
+ # conv_cfg must be a dict or None
+ DepthwiseSeparableConvModule(4, 8, 2, groups=2)
+
+ # test default config
+ conv = DepthwiseSeparableConvModule(3, 8, 2)
+ assert conv.depthwise_conv.conv.groups == 3
+ assert conv.pointwise_conv.conv.kernel_size == (1, 1)
+ assert not conv.depthwise_conv.with_norm
+ assert not conv.pointwise_conv.with_norm
+ assert conv.depthwise_conv.activate.__class__.__name__ == 'ReLU'
+ assert conv.pointwise_conv.activate.__class__.__name__ == 'ReLU'
+ x = torch.rand(1, 3, 256, 256)
+ output = conv(x)
+ assert output.shape == (1, 8, 255, 255)
+
+ # test
+ conv = DepthwiseSeparableConvModule(3, 8, 2, dw_norm_cfg=dict(type='BN'))
+ assert conv.depthwise_conv.norm_name == 'bn'
+ assert not conv.pointwise_conv.with_norm
+ x = torch.rand(1, 3, 256, 256)
+ output = conv(x)
+ assert output.shape == (1, 8, 255, 255)
+
+ conv = DepthwiseSeparableConvModule(3, 8, 2, pw_norm_cfg=dict(type='BN'))
+ assert not conv.depthwise_conv.with_norm
+ assert conv.pointwise_conv.norm_name == 'bn'
+ x = torch.rand(1, 3, 256, 256)
+ output = conv(x)
+ assert output.shape == (1, 8, 255, 255)
+
+ # add test for ['norm', 'conv', 'act']
+ conv = DepthwiseSeparableConvModule(3, 8, 2, order=('norm', 'conv', 'act'))
+ x = torch.rand(1, 3, 256, 256)
+ output = conv(x)
+ assert output.shape == (1, 8, 255, 255)
+
+ conv = DepthwiseSeparableConvModule(
+ 3, 8, 3, padding=1, with_spectral_norm=True)
+ assert hasattr(conv.depthwise_conv.conv, 'weight_orig')
+ assert hasattr(conv.pointwise_conv.conv, 'weight_orig')
+ output = conv(x)
+ assert output.shape == (1, 8, 256, 256)
+
+ conv = DepthwiseSeparableConvModule(
+ 3, 8, 3, padding=1, padding_mode='reflect')
+ assert isinstance(conv.depthwise_conv.padding_layer, nn.ReflectionPad2d)
+ output = conv(x)
+ assert output.shape == (1, 8, 256, 256)
+
+ conv = DepthwiseSeparableConvModule(
+ 3, 8, 3, padding=1, dw_act_cfg=dict(type='LeakyReLU'))
+ assert conv.depthwise_conv.activate.__class__.__name__ == 'LeakyReLU'
+ assert conv.pointwise_conv.activate.__class__.__name__ == 'ReLU'
+ output = conv(x)
+ assert output.shape == (1, 8, 256, 256)
+
+ conv = DepthwiseSeparableConvModule(
+ 3, 8, 3, padding=1, pw_act_cfg=dict(type='LeakyReLU'))
+ assert conv.depthwise_conv.activate.__class__.__name__ == 'ReLU'
+ assert conv.pointwise_conv.activate.__class__.__name__ == 'LeakyReLU'
+ output = conv(x)
+ assert output.shape == (1, 8, 256, 256)
diff --git a/tests/test_sampler.py b/tests/test_sampler.py
new file mode 100644
index 0000000000..af26b8dd62
--- /dev/null
+++ b/tests/test_sampler.py
@@ -0,0 +1,21 @@
+import pytest
+import torch
+
+from mmseg.core import OHEMPixelSampler
+
+
+def test_ohem_sampler():
+
+ with pytest.raises(AssertionError):
+ # seg_logit and seg_label must be of the same size
+ sampler = OHEMPixelSampler()
+ seg_logit = torch.randn(1, 19, 45, 45)
+ seg_label = torch.randint(0, 19, size=(1, 1, 89, 89))
+ sampler.sample(seg_logit, seg_label)
+
+ sampler = OHEMPixelSampler()
+ seg_logit = torch.randn(1, 19, 45, 45)
+ seg_label = torch.randint(0, 19, size=(1, 1, 45, 45))
+ seg_weight = sampler.sample(seg_logit, seg_label)
+ assert seg_weight.shape[0] == seg_logit.shape[0]
+ assert seg_weight.shape[1:] == seg_logit.shape[2:]
diff --git a/tools/benchmark.py b/tools/benchmark.py
new file mode 100644
index 0000000000..bcb0d9580f
--- /dev/null
+++ b/tools/benchmark.py
@@ -0,0 +1,81 @@
+import argparse
+import time
+
+import torch
+from mmcv import Config
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint
+
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.models import build_segmentor
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='MMSeg benchmark a model')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument(
+ '--log-interval', type=int, default=50, help='interval of logging')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+ # set cudnn_benchmark
+ torch.backends.cudnn.benchmark = False
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+
+ # build the dataloader
+ # TODO: support multiple images per gpu (only minor changes are needed)
+ dataset = build_dataset(cfg.data.test)
+ data_loader = build_dataloader(
+ dataset,
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=False,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_segmentor(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+ load_checkpoint(model, args.checkpoint, map_location='cpu')
+
+ model = MMDataParallel(model, device_ids=[0])
+
+ model.eval()
+
+ # the first several iterations may be very slow so skip them
+ num_warmup = 5
+ pure_inf_time = 0
+ total_iters = 200
+
+ # benchmark with 200 image and take the average
+ for i, data in enumerate(data_loader):
+
+ torch.cuda.synchronize()
+ start_time = time.perf_counter()
+
+ with torch.no_grad():
+ model(return_loss=False, rescale=True, **data)
+
+ torch.cuda.synchronize()
+ elapsed = time.perf_counter() - start_time
+
+ if i >= num_warmup:
+ pure_inf_time += elapsed
+ if (i + 1) % args.log_interval == 0:
+ fps = (i + 1 - num_warmup) / pure_inf_time
+ print(f'Done image [{i + 1:<3}/ {total_iters}], '
+ f'fps: {fps:.2f} img / s')
+
+ if (i + 1) == total_iters:
+ fps = (i + 1 - num_warmup) / pure_inf_time
+ print(f'Overall fps: {fps:.2f} img / s')
+ break
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/convert_datasets/cityscapes.py b/tools/convert_datasets/cityscapes.py
new file mode 100644
index 0000000000..99d05b41f5
--- /dev/null
+++ b/tools/convert_datasets/cityscapes.py
@@ -0,0 +1,55 @@
+import argparse
+import os.path as osp
+
+import mmcv
+from cityscapesscripts.preparation.json2labelImg import json2labelImg
+
+
+def convert_json_to_label(json_file):
+ label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
+ json2labelImg(json_file, label_file, 'trainIds')
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Convert Cityscapes annotations to TrainIds')
+ parser.add_argument('cityscapes_path', help='cityscapes data path')
+ parser.add_argument('--gt-dir', default='gtFine', type=str)
+ parser.add_argument('-o', '--out-dir', help='output path')
+ parser.add_argument(
+ '--nproc', default=1, type=int, help='number of process')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ cityscapes_path = args.cityscapes_path
+ out_dir = args.out_dir if args.out_dir else cityscapes_path
+ mmcv.mkdir_or_exist(out_dir)
+
+ gt_dir = osp.join(cityscapes_path, args.gt_dir)
+
+ poly_files = []
+ for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True):
+ poly_file = osp.join(gt_dir, poly)
+ poly_files.append(poly_file)
+ if args.nproc > 1:
+ mmcv.track_parallel_progress(convert_json_to_label, poly_files,
+ args.nproc)
+ else:
+ mmcv.track_progress(convert_json_to_label, poly_files)
+
+ split_names = ['train', 'val', 'test']
+
+ for split in split_names:
+ filenames = []
+ for poly in mmcv.scandir(
+ osp.join(gt_dir, split), '_polygons.json', recursive=True):
+ filenames.append(poly.replace('_gtFine_polygons.json', ''))
+ with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
+ f.writelines(f + '\n' for f in filenames)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/convert_datasets/voc_aug.py b/tools/convert_datasets/voc_aug.py
new file mode 100644
index 0000000000..fd5400361f
--- /dev/null
+++ b/tools/convert_datasets/voc_aug.py
@@ -0,0 +1,87 @@
+import argparse
+import os.path as osp
+from functools import partial
+
+import mmcv
+import numpy as np
+from PIL import Image
+from scipy.io import loadmat
+
+AUG_LEN = 10582
+
+
+def convert_mat(mat_file, in_dir, out_dir):
+ data = loadmat(osp.join(in_dir, mat_file))
+ mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+ seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
+ Image.fromarray(mask).save(seg_filename, 'PNG')
+
+
+def generate_aug_list(merged_list, excluded_list):
+ return list(set(merged_list) - set(excluded_list))
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Convert PASCAL VOC annotations to mmsegmentation format')
+ parser.add_argument('devkit_path', help='pascal voc devkit path')
+ parser.add_argument('aug_path', help='pascal voc aug path')
+ parser.add_argument('-o', '--out_dir', help='output path')
+ parser.add_argument(
+ '--nproc', default=1, type=int, help='number of process')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ devkit_path = args.devkit_path
+ aug_path = args.aug_path
+ nproc = args.nproc
+ if args.out_dir is None:
+ out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
+ else:
+ out_dir = args.out_dir
+ mmcv.mkdir_or_exist(out_dir)
+ in_dir = osp.join(aug_path, 'dataset', 'cls')
+
+ mmcv.track_parallel_progress(
+ partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
+ list(mmcv.scandir(in_dir, suffix='.mat')),
+ nproc=nproc)
+
+ with open(osp.join(aug_path, 'dataset', 'trainval.txt')) as f:
+ full_aug_list = [line.strip() for line in f]
+ with open(
+ osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+ 'train.txt')) as f:
+ ori_train_list = [line.strip() for line in f]
+ with open(
+ osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+ 'val.txt')) as f:
+ val_list = [line.strip() for line in f]
+
+ aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
+ val_list)
+ assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
+ AUG_LEN)
+
+ with open(
+ osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+ 'trainaug.txt'), 'w') as f:
+ f.writelines(line + '\n' for line in aug_train_list)
+
+ aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
+ assert len(aug_list) == AUG_LEN - len(
+ ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
+ len(ori_train_list))
+ with open(
+ osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
+ 'w') as f:
+ f.writelines(line + '\n' for line in aug_list)
+
+ print('Done!')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/dist_test.sh b/tools/dist_test.sh
new file mode 100755
index 0000000000..7381dfb1d7
--- /dev/null
+++ b/tools/dist_test.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+PORT=${PORT:-29500}
+$CONFIG\/$GPUS/
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+ $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
diff --git a/tools/dist_train.sh b/tools/dist_train.sh
new file mode 100755
index 0000000000..5b43fffbf2
--- /dev/null
+++ b/tools/dist_train.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+GPUS=$2
+PORT=${PORT:-29500}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+ $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
diff --git a/tools/get_flops.py b/tools/get_flops.py
new file mode 100644
index 0000000000..86f1c5a9ef
--- /dev/null
+++ b/tools/get_flops.py
@@ -0,0 +1,55 @@
+import argparse
+
+from mmcv import Config
+from mmcv.cnn import get_model_complexity_info
+
+from mmseg.models import build_segmentor
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a segmentor')
+ parser.add_argument('config', help='train config file path')
+ parser.add_argument(
+ '--shape',
+ type=int,
+ nargs='+',
+ default=[2048, 1024],
+ help='input image size')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+
+ args = parse_args()
+
+ if len(args.shape) == 1:
+ input_shape = (3, args.shape[0], args.shape[0])
+ elif len(args.shape) == 2:
+ input_shape = (3, ) + tuple(args.shape)
+ else:
+ raise ValueError('invalid input shape')
+
+ cfg = Config.fromfile(args.config)
+ model = build_segmentor(
+ cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda()
+ model.eval()
+
+ if hasattr(model, 'forward_dummy'):
+ model.forward = model.forward_dummy
+ else:
+ raise NotImplementedError(
+ 'FLOPs counter is currently not currently supported with {}'.
+ format(model.__class__.__name__))
+
+ flops, params = get_model_complexity_info(model, input_shape)
+ split_line = '=' * 30
+ print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
+ split_line, input_shape, flops, params))
+ print('!!!Please be cautious if you use the results in papers. '
+ 'You may need to check if all ops are supported and verify that the '
+ 'flops computation is correct.')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/print_config.py b/tools/print_config.py
new file mode 100644
index 0000000000..2a0c67780a
--- /dev/null
+++ b/tools/print_config.py
@@ -0,0 +1,28 @@
+import argparse
+
+from mmcv import Config, DictAction
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Print the whole config')
+ parser.add_argument('config', help='config file path')
+ parser.add_argument(
+ '--options', nargs='+', action=DictAction, help='arguments in dict')
+ args = parser.parse_args()
+
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+ if args.options is not None:
+ cfg.merge_from_dict(args.options)
+ print(f'Config:\n{cfg.pretty_text}')
+ # dump config
+ cfg.dump('example.py')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/publish_model.py b/tools/publish_model.py
new file mode 100644
index 0000000000..a049f17674
--- /dev/null
+++ b/tools/publish_model.py
@@ -0,0 +1,35 @@
+import argparse
+import subprocess
+
+import torch
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Process a checkpoint to be published')
+ parser.add_argument('in_file', help='input checkpoint filename')
+ parser.add_argument('out_file', help='output checkpoint filename')
+ args = parser.parse_args()
+ return args
+
+
+def process_checkpoint(in_file, out_file):
+ checkpoint = torch.load(in_file, map_location='cpu')
+ # remove optimizer for smaller file size
+ if 'optimizer' in checkpoint:
+ del checkpoint['optimizer']
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
+ # add the code here.
+ torch.save(checkpoint, out_file)
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
+ final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+ subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+ args = parse_args()
+ process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/slurm_test.sh b/tools/slurm_test.sh
new file mode 100755
index 0000000000..4e6f7bf4e3
--- /dev/null
+++ b/tools/slurm_test.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-4}
+GPUS_PER_NODE=${GPUS_PER_NODE:-4}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+ --job-name=${JOB_NAME} \
+ --gres=gpu:${GPUS_PER_NODE} \
+ --ntasks=${GPUS} \
+ --ntasks-per-node=${GPUS_PER_NODE} \
+ --cpus-per-task=${CPUS_PER_TASK} \
+ --kill-on-bad-exit=1 \
+ ${SRUN_ARGS} \
+ python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
diff --git a/tools/slurm_train.sh b/tools/slurm_train.sh
new file mode 100755
index 0000000000..ab232105f0
--- /dev/null
+++ b/tools/slurm_train.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+GPUS=${GPUS:-4}
+GPUS_PER_NODE=${GPUS_PER_NODE:-4}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${@:4}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+ --job-name=${JOB_NAME} \
+ --gres=gpu:${GPUS_PER_NODE} \
+ --ntasks=${GPUS} \
+ --ntasks-per-node=${GPUS_PER_NODE} \
+ --cpus-per-task=${CPUS_PER_TASK} \
+ --kill-on-bad-exit=1 \
+ ${SRUN_ARGS} \
+ python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
diff --git a/tools/test.py b/tools/test.py
new file mode 100644
index 0000000000..3910f1f0bb
--- /dev/null
+++ b/tools/test.py
@@ -0,0 +1,142 @@
+import argparse
+import os
+
+import mmcv
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+from mmcv.utils import DictAction
+
+from mmseg.apis import multi_gpu_test, single_gpu_test
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.models import build_segmentor
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='mmseg test (and eval) a model')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument(
+ '--aug-test', action='store_true', help='Use Flip and Multi scale aug')
+ parser.add_argument('--out', help='output result file in pickle format')
+ parser.add_argument(
+ '--format-only',
+ action='store_true',
+ help='Format the output results without perform evaluation. It is'
+ 'useful when you want to format the result to a specific format and '
+ 'submit it to the test server')
+ parser.add_argument(
+ '--eval',
+ type=str,
+ nargs='+',
+ help='evaluation metrics, which depends on the dataset, e.g., "mIoU"'
+ ' for generic datasets, and "cityscapes" for Cityscapes')
+ parser.add_argument('--show', action='store_true', help='show results')
+ parser.add_argument(
+ '--show-dir', help='directory where painted images will be saved')
+ parser.add_argument(
+ '--gpu-collect',
+ action='store_true',
+ help='whether to use gpu to collect results.')
+ parser.add_argument(
+ '--tmpdir',
+ help='tmp directory used for collecting results from multiple '
+ 'workers, available when gpu_collect is not specified')
+ parser.add_argument(
+ '--options', nargs='+', action=DictAction, help='custom options')
+ parser.add_argument(
+ '--eval-options',
+ nargs='+',
+ action=DictAction,
+ help='custom options for evaluation')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+ return args
+
+
+def main():
+ args = parse_args()
+
+ assert args.out or args.eval or args.format_only or args.show \
+ or args.show_dir, \
+ ('Please specify at least one operation (save/eval/format/show the '
+ 'results / save the results) with the argument "--out", "--eval"'
+ ', "--format-only", "--show" or "--show-dir"')
+
+ if args.eval and args.format_only:
+ raise ValueError('--eval and --format_only cannot be both specified')
+
+ if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+ raise ValueError('The output file must be a pkl file.')
+
+ cfg = mmcv.Config.fromfile(args.config)
+ if args.options is not None:
+ cfg.merge_from_dict(args.options)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ if args.aug_test:
+ # hard code index
+ cfg.data.test.pipeline[1].img_ratios = [
+ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75
+ ]
+ cfg.data.test.pipeline[1].flip = True
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # build the dataloader
+ # TODO: support multiple images per gpu (only minor changes are needed)
+ dataset = build_dataset(cfg.data.test)
+ data_loader = build_dataloader(
+ dataset,
+ samples_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=distributed,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_segmentor(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+ checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ model.PALETTE = checkpoint['meta']['PALETTE']
+
+ if not distributed:
+ model = MMDataParallel(model, device_ids=[0])
+ outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
+ else:
+ model = MMDistributedDataParallel(
+ model.cuda(),
+ device_ids=[torch.cuda.current_device()],
+ broadcast_buffers=False)
+ outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+ args.gpu_collect)
+
+ rank, _ = get_dist_info()
+ if rank == 0:
+ if args.out:
+ print(f'\nwriting results to {args.out}')
+ mmcv.dump(outputs, args.out)
+ kwargs = {} if args.eval_options is None else args.eval_options
+ if args.format_only:
+ dataset.format_results(outputs, **kwargs)
+ if args.eval:
+ dataset.evaluate(outputs, args.eval, **kwargs)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/train.py b/tools/train.py
new file mode 100644
index 0000000000..26e8274b1f
--- /dev/null
+++ b/tools/train.py
@@ -0,0 +1,157 @@
+import argparse
+import copy
+import os
+import os.path as osp
+import time
+
+import mmcv
+import torch
+from mmcv.runner import init_dist
+from mmcv.utils import Config, DictAction
+
+from mmseg import __version__
+from mmseg.apis import set_random_seed, train_segmentor
+from mmseg.datasets import build_dataset
+from mmseg.models import build_segmentor
+from mmseg.utils import collect_env, get_root_logger
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a segmentor')
+ parser.add_argument('config', help='train config file path')
+ parser.add_argument('--work_dir', help='the dir to save logs and models')
+ parser.add_argument(
+ '--resume-from', help='the checkpoint file to resume from')
+ parser.add_argument(
+ '--no-validate',
+ action='store_true',
+ help='whether not to evaluate the checkpoint during training')
+ group_gpus = parser.add_mutually_exclusive_group()
+ group_gpus.add_argument(
+ '--gpus',
+ type=int,
+ help='number of gpus to use '
+ '(only applicable to non-distributed training)')
+ group_gpus.add_argument(
+ '--gpu-ids',
+ type=int,
+ nargs='+',
+ help='ids of gpus to use '
+ '(only applicable to non-distributed training)')
+ parser.add_argument('--seed', type=int, default=None, help='random seed')
+ parser.add_argument(
+ '--deterministic',
+ action='store_true',
+ help='whether to set deterministic options for CUDNN backend.')
+ parser.add_argument(
+ '--options', nargs='+', action=DictAction, help='custom options')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+ if args.options is not None:
+ cfg.merge_from_dict(args.options)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+
+ # work_dir is determined in this priority: CLI > segment in file > filename
+ if args.work_dir is not None:
+ # update configs according to CLI args if args.work_dir is not None
+ cfg.work_dir = args.work_dir
+ elif cfg.get('work_dir', None) is None:
+ # use config filename as default work_dir if cfg.work_dir is None
+ cfg.work_dir = osp.join('./work_dirs',
+ osp.splitext(osp.basename(args.config))[0])
+ if args.resume_from is not None:
+ cfg.resume_from = args.resume_from
+ if args.gpu_ids is not None:
+ cfg.gpu_ids = args.gpu_ids
+ else:
+ cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # create work_dir
+ mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+ # dump config
+ cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
+ # init the logger before other steps
+ timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+ log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+ logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+
+ # init the meta dict to record some important information such as
+ # environment info and seed, which will be logged
+ meta = dict()
+ # log env info
+ env_info_dict = collect_env()
+ env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
+ dash_line = '-' * 60 + '\n'
+ logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+ dash_line)
+ meta['env_info'] = env_info
+
+ # log some basic info
+ logger.info(f'Distributed training: {distributed}')
+ logger.info(f'Config:\n{cfg.pretty_text}')
+
+ # set random seeds
+ if args.seed is not None:
+ logger.info(f'Set random seed to {args.seed}, deterministic: '
+ f'{args.deterministic}')
+ set_random_seed(args.seed, deterministic=args.deterministic)
+ cfg.seed = args.seed
+ meta['seed'] = args.seed
+ meta['exp_name'] = osp.basename(args.config)
+
+ model = build_segmentor(
+ cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
+
+ logger.info(model)
+
+ datasets = [build_dataset(cfg.data.train)]
+ if len(cfg.workflow) == 2:
+ val_dataset = copy.deepcopy(cfg.data.val)
+ val_dataset.pipeline = cfg.data.train.pipeline
+ datasets.append(build_dataset(val_dataset))
+ if cfg.checkpoint_config is not None:
+ # save mmseg version, config file content and class names in
+ # checkpoints as meta data
+ cfg.checkpoint_config.meta = dict(
+ mmseg_version=__version__,
+ config=cfg.pretty_text,
+ CLASSES=datasets[0].CLASSES,
+ PALETTE=datasets[0].PALETTE)
+ # add an attribute for visualization convenience
+ model.CLASSES = datasets[0].CLASSES
+ train_segmentor(
+ model,
+ datasets,
+ cfg,
+ distributed=distributed,
+ validate=(not args.no_validate),
+ timestamp=timestamp,
+ meta=meta)
+
+
+if __name__ == '__main__':
+ main()