先看效果:
文生图
图生图
来看一下怎么构造自己的图片生成应用。
1 申请腾讯云AI绘画内测名额
链接:https://console.cloud.tencent.com/aiart
2 获取腾讯云的API key
链接:https://console.cloud.tencent.com/cam/capi
点击“新建密钥”
会生成SecretId和SecretKey
3 在代码运行环境配置密钥的环境变量
Windows配置环境变量:
setx TENCENTCLOUD_SECRET_ID xxx
setx TENCENTCLOUD_SECRET_KEY xxx
xxx换成第2步生成的SecretId和SecretKey
Linux配置环境变量:
export TENCENTCLOUD_SECRET_ID="xxx"
export TENCENTCLOUD_SECRET_KEY="xxx"
4 编写代码
代码实现的功能是获取页面输出的内容,传给腾讯云,通过腾讯云生成图片的base64编码,再通过base64编码还原出图片。
点击下方的链接进入文生图的API调试页面:
https://console.cloud.tencent.com/api/explorer?Product=aiart&Version=2022-12-29&Action=TextToImage
可以输入自己希望修改的参数,发起调用,即可生成图片的base64编码,可以找一个base64编码转图片的网站。
如果没有问题,就可以点击右上角的“代码生成”,选择自己喜欢的开发语言,我选的是Python。
图生图也类似,API调试页面如下:
https://console.cloud.tencent.com/api/explorer?Product=aiart&Version=2022-12-29&Action=ImageToImage
最后,在ChatGPT的帮助下,通过Gradio实现了前端页面,代码如下:
import json
import os
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.aiart.v20221229 import aiart_client, models
import base64
from PIL import Image
import time
import gradio as gr
import logging
import io
try:
# 文生图
def text_to_image(Prompt, Styles, Size):
timestamp = str(time.time_ns())
cred = credential.Credential(
os.environ.get("TENCENTCLOUD_SECRET_ID"),
os.environ.get("TENCENTCLOUD_SECRET_KEY"))
httpProfile = HttpProfile()
httpProfile.endpoint = "aiart.tencentcloudapi.com"
# 实例化一个client选项,可选的,没有特殊需求可以跳过
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
# 实例化要请求产品的client对象,clientProfile是可选的
client = aiart_client.AiartClient(cred, "ap-shanghai", clientProfile)
# 实例化一个请求对象,每个接口都会对应一个request对象
req = models.TextToImageRequest()
# 判断图片风格
if Styles == '素描画':
styles_num = "112"
elif Styles == '剪纸风格':
styles_num = "108"
elif Styles == '日系动漫':
styles_num = "201"
elif Styles == '2.5D人像':
styles_num = "110"
elif Styles == '赛博朋克':
styles_num = "113"
elif Styles == '科幻风格':
styles_num = "114"
elif Styles == '游戏卡通手绘':
styles_num = "301"
elif Styles == '水墨画':
styles_num = "101"
elif Styles == '印象派':
styles_num = "109"
elif Styles == '肖像画':
styles_num = "111"
elif Styles == '不限定风格':
styles_num = "000"
else:
styles_num = "000"
# 图片规格
if Size == '方图':
size_num = "768:768"
elif Size == '竖图':
size_num = "768:1024"
elif Size == '横图':
size_num = "1024:768"
else:
size_num = "768:768"
params = {
"Prompt": Prompt,
"Styles": [styles_num],
"LogoAdd": 0,
"ResultConfig": {
"Resolution": size_num
}
}
req.from_json_string(json.dumps(params))
# 返回的resp是一个TextToImageResponse的实例,与请求对象对应
resp = client.TextToImage(req)
# 输出json格式的字符串回包
img_str = str(resp.ResultImage)
img_data = base64.b64decode(img_str)
img_name = "image/" + "text_to_img" + timestamp + ".jpg"
#日志模块
msg = Prompt + "," + Styles + "," + Size + "," + img_name
logging.basicConfig(filename="log/1.log",
filemode="a",
level=logging.INFO
)
logging.info(msg)
with open(img_name, 'wb') as f:
f.write(img_data)
return img_name
def image_to_image(encoded_img, img_prompt,style_dropdown,size_dropdown,strength):
timestamp = str(time.time_ns())
cred = credential.Credential(
os.environ.get("TENCENTCLOUD_SECRET_ID"),
os.environ.get("TENCENTCLOUD_SECRET_KEY"))
httpProfile = HttpProfile()
httpProfile.endpoint = "aiart.tencentcloudapi.com"
# 实例化一个client选项,可选的,没有特殊需求可以跳过
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
# 实例化要请求产品的client对象,clientProfile是可选的
client = aiart_client.AiartClient(cred, "ap-shanghai", clientProfile)
# 实例化一个请求对象,每个接口都会对应一个request对象
req = models.ImageToImageRequest()
# 判断图片风格
if style_dropdown == '不限定风格':
styles_num = "000"
elif style_dropdown == '日系动漫':
styles_num = "201"
elif style_dropdown == '唯美古风':
styles_num = "203"
elif style_dropdown == '美系动漫':
styles_num = "202"
elif style_dropdown == '水彩画':
styles_num = "106"
else:
styles_num = "000"
# 图片规格
if size_dropdown == '方图':
size_num = "768:768"
elif size_dropdown == '竖图':
size_num = "768:1024"
elif size_dropdown == '横图':
size_num = "1024:768"
else:
size_num = "768:768"
params = {
"InputImage": encoded_img,
"Prompt": img_prompt,
"Styles": [styles_num],
"ResultConfig": {
"Resolution": size_num
},
"LogoAdd": 0,
"Strength": strength
}
req.from_json_string(json.dumps(params))
# 返回的resp是一个ImageToImageResponse的实例,与请求对象对应
resp = client.ImageToImage(req)
# 输出json格式的字符串回包
img_str = str(resp.ResultImage)
img_data = base64.b64decode(img_str)
img_name = "image/" + "img_to_img" + timestamp + ".jpg"
#日志模块
msg = img_prompt + "," + style_dropdown + "," + size_dropdown + "," + img_name
logging.basicConfig(filename="log/1.log",
filemode="a",
level=logging.INFO
)
logging.info(msg)
with open(img_name, 'wb') as f:
f.write(img_data)
return img_name
def web():
# 定义文生图输入组件
prompt = gr.inputs.Textbox(lines=5, label="输入描述(中文)")
styles = ["不限定风格", "素描画", "剪纸风格", "日系动漫", "2.5D人像", "肖像画","赛博朋克", "科幻风格", "游戏卡通手绘", "水墨画", "印象派"]
size = ["方图", "竖图", "横图"]
style_dropdown = gr.inputs.Dropdown(choices=styles, label="图片风格")
size_dropdown = gr.inputs.Dropdown(choices=size, label="图片规格")
# 定义文生图输出组件
output_image = gr.outputs.Image(type="pil", label="生成的图片")
# 定义文生图处理函数
def generate_image_with_gradio(prompt, style, size):
# 调用您的函数生成图片
image_path = text_to_image(prompt, style, size)
# 返回输出结果
return image_path
# 创建第一个接口:输入文本并输出相同的文本
echo_interface = gr.Interface(
fn=generate_image_with_gradio,
inputs=[prompt, style_dropdown, size_dropdown],
outputs=output_image,
title="文字生成图片",
description="输入文字描述,AI帮你生成图片"
)
# 定义图生图输入组件
img_styles = ["不限定风格", "日系动漫", "美系动漫", "唯美古风", "水彩画"]
img_size = ["方图", "竖图", "横图"]
input_image = gr.inputs.Image()
img_prompt = gr.inputs.Textbox(lines=3, label="输入描述(中文)")
style_dropdown = gr.inputs.Dropdown(choices=img_styles, label="图片风格")
size_dropdown = gr.inputs.Dropdown(choices=img_size, label="图片规格")
strength = gr.inputs.Slider(0, 1, default=0.50, label="生成自由度(值越小,生成图和原图越接近)")
# description="值越小,生成图和原图越接近,取值范围0~1,不传默认为0.65"
# 定义图生图输出组件
output_image = gr.outputs.Image(type="pil", label="生成的图片")
def predict(input_image):
"""
将输入的 NumPy 数组转换为 Pillow 图像对象,并将其转换为 base64 编码字符串
"""
img = Image.fromarray(input_image.astype('uint8'), 'RGB')
img_bytes = io.BytesIO()
img.save(img_bytes, format='PNG')
img_bytes.seek(0)
img_base64 = base64.b64encode(img_bytes.read())
return img_base64.decode('utf-8')
# 定义图生图处理函数
def image_image_with_gradio(input_image,img_prompt,style_dropdown,size_dropdown,strength):
encoded_img = predict(input_image)
# 调用您的函数生成图片
image_path = image_to_image(encoded_img, img_prompt,style_dropdown,size_dropdown,strength)
# 返回输出结果
return image_path
# 创建第二个接口:输入文本并输出反向的文本
reverse_interface = gr.Interface(
fn=image_image_with_gradio,
inputs=[input_image,img_prompt,style_dropdown,size_dropdown,strength],
outputs=output_image,
title="图片生成图片",
description="上传图片,AI帮你生成不同风格的图片"
)
# 合并两个接口,创建 Gradio 应用程序
gr.TabbedInterface(
[echo_interface, reverse_interface],
["文字生成图片", "图片生成图片"],
title="AI图片生成器"
).launch(server_name='0.0.0.0', server_port=8888, auth=("admin", "xxxxxx"))
web()
except TencentCloudSDKException as err:
print(err
代码写的不太好,大佬们可以帮忙优化一下。
5 安装依赖
pip install tencentcloud-sdk-python pillow gradio
6 创建文件夹
在代码文件当前路径创建图片保存文件夹和日志文件夹
mkdir image
mkdir log
7 运行Python程序
python3 texttoimage.py
8 使用
登录,IP:8888,输入用户名密码进入界面
在“文字生成图片”页面,输入描述,选择图片风格和图片规格,如下图:
在“图片生成图片”页面,上传图片,输入描述,选择图片风格和图片规格,如下图:
对比如下: