index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description" content="AnyText">
  <meta property="og:title" content="AnyText"/>
  <meta property="og:description" content="ReplaceAnything as you want"/>
  <meta property="og:url" content="URL OF THE WEBSITE"/>
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
  <meta property="og:image" content="static/images/logo.jpg" />
  <meta property="og:image:width" content="560"/>
  <meta property="og:image:height" content="560"/>


  <meta name="twitter:title" content="TWITTER BANNER TITLE META TAG">
  <meta name="twitter:description" content="TWITTER BANNER DESCRIPTION META TAG">
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
  <meta name="twitter:image" content="static/images/your_twitter_banner_image.png">
  <meta name="twitter:card" content="summary_large_image">
  <!-- Keywords for your paper to be indexed by-->
  <meta name="keywords" content="KEYWORDS SHOULD BE PLACED HERE">
  <meta name="viewport" content="width=device-width, initial-scale=1">


  <title>AnyText</title>
  <link rel="icon" type="image/x-icon" href="static/images/logo.jpg">
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
  rel="stylesheet">

  <link rel="stylesheet" href="static/css/bulma.min.css">
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
  href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="static/css/index.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>
<body>


  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">AnyText: Multilingual Visual Text Generation And Editing</h1>
            <div class="is-size-5 publication-authors">
              <!-- Paper authors -->
              <span class="author-block">
                <a href="https://openreview.net/profile?id=~Yuxiang_Tuo2" target="_blank">Yuxiang Tuo</a>,</span>
                <span class="author-block">
                  <a href="https://openreview.net/profile?id=~Wangmeng_Xiang1" target="_blank">Wangmeng Xiang</a>,</span>
                  <span class="author-block">
                    <a href="https://openreview.net/profile?id=~Jun-Yan_He2" target="_blank">Jun-Yan He</a>,
                  </span>
                      <span class="author-block">
                        <a href="https://openreview.net/profile?id=~Yifeng_Geng2" target="_blank">Yifeng Geng</a>,</span>
                          <span class="author-block">
                            <a href="https://openreview.net/profile?id=~Xuansong_Xie1" target="_blank">Xuansong Xie</a>,</span>
                  </div>

                  <div class="is-size-5 publication-authors">
                    <span class="author-block">Institute for Intelligent Computing, Alibaba Group</span>
                  </div>

                  <div class="column has-text-centered">
                    <div class="publication-links">
                         <!-- Arxiv PDF link -->
                      <span class="link-block">
                        <a href="https://arxiv.org/abs/2311.03054" target="_blank"
                        class="external-link button is-normal is-rounded is-dark">
                        <span class="icon">
                          <i class="fas fa-file-pdf"></i>
                        </span>
                        <span>Paper</span>
                      </a>
                    </span>

                  <!-- Github link -->
                  <span class="link-block">
                    <a href="https://github.com/tyxsspa/AnyText" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-github"></i>
                    </span>
                    <span>Code</span>
                  </a>
                </span>

                <!-- ModelScope Link -->
                <span class="link-block">
                  <a href="https://modelscope.cn/models/damo/cv_anytext_text_generation_editing/summary" target="_blank"
                  class="external-link button is-normal is-rounded is-dark">
                  <span>ModelScope Demo</span>
                </a>
              </span>
                      
                <!-- HuggingFace Link -->
                <span class="link-block">
                  <a href="https://huggingface.co/spaces/modelscope/AnyText" target="_blank"
                  class="external-link button is-normal is-rounded is-dark">
                  <span>HuggingFace Demo</span>
                </a>
              </span>
            </div>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>


<!-- Paper abstract -->
<section class="section hero is-light">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Diffusion model based Text-to-Image has achieved impressive achievements recently. Although current technology for synthesizing images is highly advanced and capable of generating images with high fidelity, it is still possible to give the show away when focusing on the text area in the generated image, as synthesized text often contains blurred, unreadable, or incorrect characters, making visual text generation one of the most challenging issues in this field. To address this issue, we introduce AnyText, a diffusion-based multilingual visual text generation and editing model, that focuses on rendering accurate and coherent text in the image. AnyText comprises a diffusion pipeline with two primary elements: an auxiliary latent module and a text embedding module. The former uses inputs like text glyph, position, and masked image to generate latent features for text generation or editing. The latter employs an OCR model for encoding stroke data as embeddings, which blend with image caption embeddings from the tokenizer to generate texts that seamlessly integrate with the background. We employed text-control diffusion loss and text perceptual loss for training to further enhance writing accuracy. AnyText can write characters in multiple languages, to the best of our knowledge, this is the first work to address multilingual visual text generation. It is worth mentioning that AnyText can be plugged into existing diffusion models from the community for rendering or editing text accurately. After conducting extensive evaluation experiments, our method has outperformed all other approaches by a significant margin. Additionally, we contribute the first large-scale multilingual text images dataset, AnyWord-3M, containing 3 million image-text pairs with OCR annotations in multiple languages. Based on AnyWord-3M dataset, we propose AnyText-benchmark for the evaluation of visual text generation accuracy and quality. Our project will be open-sourced soon to improve and promote the development of text generation technology.
          </p>
        </div>
      </div>
    </div>
  </div>
</section>
<!-- End paper abstract -->


<!-- Image carousel -->
<section class="hero is-small">
  <div class="hero-body">
    <div class="container">
      <div id="results-carousel" class="carousel results-carousel">
       <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase1.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase2.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase3.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase4.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase5.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase6.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase7.jpg" alt="MY ALT TEXT"/>
      </div>
      <div class="item">
        <!-- Your image here -->
        <img src="static/images/showcase8.jpg" alt="MY ALT TEXT"/>
      </div>
  </div>
</div>
</div>
</section>
<!-- End image carousel -->

  <footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">

          <p>
            This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
            You are free to borrow the of this website, we just ask that you link back to this page in the footer. <br> This website is licensed under a <a rel="license"  href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>

        </div>
      </div>
    </div>
  </div>
</footer>

<!-- Statcounter tracking code -->
  
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

    <!-- End of Statcounter Code -->

  </body>
  </html>