JapaneseStableDiffusionを使ったテクスチャ作成

2022年10月1日 21:23

先週は英語版のStableDiffusionでテクスチャを作成した。
今回は日本語版でもできるかを検討してみた。
内容は前回とほぼ同じ。使用するパッケージが変わったため、手続きが微妙に異なる。

設定

前回同様にHugging Faceトークンは取得しておく。(前回と同じもので可)
japanese-stable-diffusionにアクセスしライセンスを確認し同意する。

googleのドライブからノートブックを作成する。
「ランタイム」→「ランタイムのタイプを変更」→「ハードウェアアクセラレータ」を「GPU」に変更。

パッケージをインストールする。
日本語対応したjapanese-stable-diffusionのパッケージを使う。

# パッケージのインストール
!pip install git+https://github.com/rinnakk/japanese-stable-diffusion

取得したトークンを貼り付ける。

# Hugging Faceトークン
YOUR_TOKEN = "[トークン]"

Seamless Texture

前回同様モジュールのtorchパッチを当ててシームレス用に変換する。
特に変なものは作っていないが、NSFW判定に引っかかるものが多かったので、NSFW判定回避も行った。

# ライブラリインポート
import torch
from torch import autocast
# JapaneseStableDiffusionパイプライン
from diffusers import LMSDiscreteScheduler
from japanese_stable_diffusion import JapaneseStableDiffusionPipeline

# シームレス変換用関数
def patch_conv(cls):
	init = cls.__init__
	def __init__(self, *args, **kwargs):
		return init(self, *args, **kwargs, padding_mode='circular')
	cls.__init__ = __init__

# シームレス変換実行
patch_conv(torch.nn.Conv2d)

# スケジューラーを使用する
scheduler = LMSDiscreteScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000,
    )
# パイプラインの準備
pipe = JapaneseStableDiffusionPipeline.from_pretrained(
    "rinna/japanese-stable-diffusion",
    scheduler=scheduler,
    use_auth_token=YOUR_TOKEN).to("cuda")
# safety_checkerを上書きする(NSFW判定回避)
pipe.safety_checker = lambda images, **kwargs: (images, False)

パラメータを設定し画像を生成する。
前回同様ブロッコリーカリフラワーのSeamless Textureを作成する。

prompt ="ブロッコリー　カリフラワー" 

# 画像生成
with autocast("cuda"):
    image = pipe(
        prompt,                                           # プロンプト
        height=512,                                       # 生成する画像の幅
        width=512,                                        # 生成する画像の高さ
        guidance_scale=7.5,                               # 画像とプロンプトの類似度 (0〜20)
        num_inference_steps=50,                           # 画像生成に費やすステップ数
        generator=torch.Generator("cuda").manual_seed(0), # 乱数シードジェネレータ
        ).images[0]
image

# 画像出力
image.save("ファイル名")

Matcap Texture

前回同様Inpaintパイプラインを使用する。
余談だが、Img2Imgパイプラインも日本語対応していた。

# ライブラリインポート
import torch
from torch import autocast
from PIL import Image
# JapaneseStableDiffusionInpaintパイプライン
from diffusers import LMSDiscreteScheduler
from japanese_stable_diffusion import JapaneseStableDiffusionInpaintPipeline

# スケジューラーを使用する
scheduler = LMSDiscreteScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000,
    )
# パイプラインの準備
pipe = JapaneseStableDiffusionInpaintPipeline.from_pretrained(
    "rinna/japanese-stable-diffusion",
    scheduler=scheduler,
    use_auth_token=YOUR_TOKEN).to("cuda")
# safety_checkerを上書きする(NSFW判定回避)
pipe.safety_checker = lambda images, **kwargs: (images, False)

前回同様同じ画像を使用した。

# input
w, h = 512, 512 # 横縦サイズ
input_path = "/content/drive/MyDrive/Colab Notebooks/xxx" # ファイルパス

prompt = "matcap,glitter,diamond"
img_name = "MC Basic 1.png"   # 元画像ファイル名
mask_name = "circle_mask.png" # mask画像ファイル名
init_image = Image.open(input_path+img_name).convert("RGB").resize((w, h))
mask_image = Image.open(input_path+mask_name).convert("RGB").resize((w, h))

# 画像生成
with autocast("cuda"):
    inimg = pipeinp(
        prompt=prompt,                                    # プロンプト
        init_image=init_image,                            # 入力画像
        mask_image=mask_image,                            # マスク画像
        strength=0.8,                                     # 入力画像と出力画像と相違度 (0.0〜1.0) #0.75
        guidance_scale=7.5,                               # プロンプトと出力画像の類似度　(0〜20) #7.5
        num_inference_steps=50,                           # 画像生成に費やすステップ数 (50)
        generator=torch.Generator("cuda").manual_seed(0), # 乱数シード
        ).images[0]
image

# 画像出力
image.save("ファイル名")

おおむね同じようなものが生成された。

この記事が気に入ったらサポートをしてみませんか？