ReBEL模型的本地部署与运行,用于三元组抽取任务(事件抽取、知识抽取)

1.服务器

CUDA 11.7

 2.依赖

conda create -n rebel_env python=3.10 -y
conda activate rebel_env

# 安装 PyTorch(建议与 CUDA 11.7 配合)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117

# 安装 Transformers 和基础工具包
pip install transformers==4.33.2
pip install pandas tqdm

pip install "numpy<2"

3.加载模型 

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# 加载模型
tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large").cuda()

def extract_triplets_from_texts(texts):
    results = []

    for text in texts:
        prompt = f"extract triples: {text}"
        inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
        inputs = {k: v.cuda() for k, v in inputs.items()}
        outputs = model.generate(**inputs, max_length=256)
        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
        triplets = parse_rebel_output(decoded)
        results.append({
            "text": text,
            "decoded_output": decoded,
            "triplets": triplets
        })

    return results

def parse_rebel_output(text):
    """
    改进版解析:ReBEL 输出短语之间两个空格,短语内部为一个空格。
    """
    # 按两个空格拆分短语
    phrases = [p.strip() for p in text.strip().split("  ") if p.strip()]
    triplets = []

    # 每三个短语组成一个三元组
    for i in range(0, len(phrases) - 2, 3):
        subj = phrases[i]
        rel = phrases[i + 1]
        obj = phrases[i + 2]
        triplets.append((subj, rel, obj))

    return triplets



# 示例输入(支持多个句子)
sentences = [
    "Barack Obama was born in Hawaii and was elected president of the United States in 2008.",
    "Elon Musk founded SpaceX and Tesla.",
    "Paris is the capital of France.",
    "Apple released the iPhone in 2007."
]

# 批量处理
outputs = extract_triplets_from_texts(sentences)

# 输出结果
for item in outputs:
    print("🔎 原文:", item["text"])
    print("🧾 解码:", item["decoded_output"])
    print("🧩 三元组:")
    for triple in item["triplets"]:
        print("    ", triple)
    print("-" * 60)

4.运行效果

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值