本教程将向您展示如何使用ChatGPT函数来执行与Amazon S3存储桶相关的任务的示例。这篇教程涵盖了S3存储桶的关键功能,包括运行简单的列出命令、在所有存储桶中搜索特定文件、将文件上传到存储桶以及从存储桶下载文件。OpenAI Chat API可以理解用户的指令,生成自然语言响应,并根据用户输入提取适当的函数调用。
先决条件
在开始之前,请确保您具备以下条件:
- 生成一个具有S3存储桶写入权限的AWS访问密钥,并将它们存储在与OpenAI密钥一起的本地环境文件中。环境文件的格式如下:
AWS_ACCESS_KEY_ID=<您的密钥>
AWS_SECRET_ACCESS_KEY=<您的密钥>
OPENAI_API_KEY=<您的密钥>
- 安装所需的Python模块。您可以使用以下命令来安装它们:
!pip install openai boto3 tenacity python-dotenv
代码示例
以下是使用ChatGPT和AWS S3存储桶互动的示例代码。
import openai
import json
import boto3
import os
import datetime
from urllib.request import urlretrieve
from dotenv import load_dotenv
# 从.env文件加载环境变量
load_dotenv()
# 设置OpenAI API密钥
openai.api_key = os.environ.get("OPENAI_API_KEY")
GPT_MODEL = "gpt-3.5-turbo"
# 创建S3客户端
s3_client = boto3.client('s3')
# 定义函数字典,以便为GPT模型提供S3操作的详细信息
functions = [
# 列出所有可用的S3存储桶
{
"name": "list_buckets",
"description": "列出所有可用的S3存储桶",
"parameters": {
"type": "object",
"properties": {}
}
},
# 列出给定S3存储桶内的对象或文件
{
"name": "list_objects",
"description": "列出给定S3存储桶内的对象或文件",
"parameters": {
"type": "object",
"properties": {
"bucket": {"type": "string", "description": "S3存储桶的名称"},
"prefix": {"type": "string", "description": "S3存储桶中的文件夹路径"},
},
"required": ["bucket"],
},
},
# 从S3存储桶中下载特定文件到本地目标文件夹
{
"name": "download_file",
"description": "从S3存储桶中下载特定文件到本地目标文件夹",
"parameters": {
"type": "object",
"properties": {
"bucket": {"type": "string", "description": "S3存储桶的名称"},
"key": {"type": "string", "description": "S3存储桶中文件的路径"},
"directory": {"type": "string", "description": "本地目标目录,由用户指定。"},
},
"required": ["bucket", "key", "directory"],
}
},
# 将文件上传到S3存储桶
{
"name": "upload_file",
"description": "将文件上传到S3存储桶",
"parameters": {
"type": "object",
"properties": {
"source": {"type": "string", "description": "本地源文件路径或远程URL"},
"bucket": {"type": "string", "description": "S3存储桶的名称"},
"key": {"type": "string", "description": "S3存储桶中文件的路径"},
"is_remote_url": {"type": "boolean", "description": "提供的源是否为URL(True)还是本地路径(False)"},
},
"required": ["source", "bucket", "key", "is_remote_url"],
}
},
# 在S3存储桶内搜索特定文件名
{
"name": "search_s3_objects",
"description": "在S3存储桶内搜索特定文件名",
"parameters": {
"type": "object",
"properties": {
"search_name": {"type": "string", "description": "要搜索的文件名"},
"bucket": {"type": "string", "description": "S3存储桶的名称"},
"prefix": {"type": "string", "description": "S3存储桶中的文件夹路径"},
"exact_match": {"type": "boolean", "description": "如果搜索应匹配完整文件名,请将exact_match设置为True。将exact_match设置为False以比较文件名字符串的一部分(文件包含)"}
},
"required": ["search_name"],
},
}
]
# 创建用于将S3操作与函数连接的辅助函数
def datetime_converter(obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
def list_buckets():
response = s3_client.list_buckets()
return json.dumps(response['Buckets'], default=datetime_converter)
def list_objects(bucket, prefix=''):
response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
return json.dumps(response.get('Contents', []), default=datetime_converter)
def download_file(bucket, key, directory):
filename = os.path.basename(key)
# 解析目标,得到正确的文件路径
destination = os.path.join(directory, filename)
s3_client.download_file(bucket, key, destination)
return json.dumps({"status": "success", "bucket": bucket, "key": key, "destination": destination})
def upload_file(source, bucket, key, is_remote_url=False):
if is_remote_url:
file_name = os.path.basename(source)
urlretrieve(source, file_name)
source = file_name
s3
_client.upload_file(source, bucket, key)
return json.dumps({"status": "success", "source": source, "bucket": bucket, "key": key})
def search_s3_objects(search_name, bucket=None, prefix='', exact_match=True):
search_name = search_name.lower()
if bucket is None:
buckets_response = json.loads(list_buckets())
buckets = [bucket_info["Name"] for bucket_info in buckets_response]
else:
buckets = [bucket]
results = []
for bucket_name in buckets:
objects_response = json.loads(list_objects(bucket_name, prefix))
if exact_match:
bucket_results = [obj for obj in objects_response if search_name == obj['Key'].lower()]
else:
bucket_results = [obj for obj in objects_response if search_name in obj['Key'].lower()]
if bucket_results:
results.extend([{"Bucket": bucket_name, "Object": obj} for obj in bucket_results])
return json.dumps(results)
# 连接名称和函数以供执行
available_functions = {
"list_buckets": list_buckets,
"list_objects": list_objects,
"download_file": download_file,
"upload_file": upload_file,
"search_s3_objects": search_s3_objects
}
# ChatGPT函数调用
def chat_completion_request(messages, functions=None, function_call='auto',
model_name=GPT_MODEL):
if functions is not None:
return openai.ChatCompletion.create(
model=model_name,
messages=messages,
functions=functions,
function_call=function_call)
else:
return openai.ChatCompletion.create(
model=model_name,
messages=messages)
# 主要对话函数
def run_conversation(user_input, topic="S3 bucket functions.", is_log=False):
system_message=f"不要假设要插入函数的值。如果用户请求不明确,请请求澄清。如果用户要求与{topic}无关的问题,请告诉他您的范围是{topic}。"
messages = [{"role": "system", "content": system_message},
{"role": "user", "content": user_input}]
# 调用模型以获取响应
response = chat_completion_request(messages, functions=functions)
response_message = response['choices'][0]['message']
if is_log:
print(response['choices'])
# 检查GPT是否想要调用函数
if response_message.get("function_call"):
function_name = response_message['function_call']['name']
function_args = json.loads(response_message['function_call']['arguments'])
# 调用函数
function_response = available_functions[function_name](**function_args)
# 将响应添加到对话中
messages.append(response_message)
messages.append({
"role": "function",
"name": function_name,
"content": function_response,
})
# 再次调用模型以总结结果
second_response = chat_completion_request(messages)
final_message = second_response['choices'][0]['message']['content']
else:
final_message = response_message['content']
return final_message
# 测试S3存储桶助手
# 列出和搜索
print(run_conversation('列出我的S3存储桶'))
search_file = '<文件名>'
print(run_conversation(f'在所有存储桶中搜索文件{search_file}'))
search_word = '<文件名部分>'
bucket_name = '<存储桶名称>'
print(run_conversation(f'在{bucket_name}中搜索包含{search_word}的文件'))
# 检查模型是否会拒绝无关的任务
print(run_conversation('今天天气如何'))
# 下载文件
search_file = '<文件名>'
bucket_name = '<存储桶名称>'
local_directory = '<本地目录路径>'
print(run_conversation(f'从{bucket_name}存储桶中下载{search_file}到{local_directory}目录'))
# 上传文件
local_file = '<文件名>'
bucket_name = '<存储桶名称>'
print(run_conversation(f'将{local_file}上传到{bucket_name}存储桶'))
结论
通过本教程,您已经学会了如何使用ChatGPT自动化AWS S3存储桶任务。这个互动的示例展示了如何连接ChatGPT的自然语言处理能力和AWS S3存储桶的功能,以执行列出、搜索、下载和上传文件等操作。这种自动化可以节省大量时间,提高工作效率。