Constructing Q&A Methods with DistilBERT and Transformers

import collections

import time

from dataclasses import dataclass

import torch

from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering, pipeline

@dataclass

class QAConfig:

“”“Configuration for QA settings”“”

max_sequence_length: int = 512

max_answer_length: int = 50

top_k: int = 3

threshold: float = 0.5

class QASystem:

“”“Q&A system with chunking”“”

def __init__(self, model_name=“distilbert-base-uncased-distilled-squad”, gadget=None):

self.gadget = gadget or (“cuda” if torch.cuda.is_available() else “cpu”)

self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)

self.mannequin = DistilBertForQuestionAnswering.from_pretrained(model_name)

# Initialize pipeline for easy queries and reply cache

self.qa_pipeline = pipeline(“question-answering”, mannequin=model_name,

tokenizer=model_name, gadget=self.gadget)

self.answer_cache = {}

def preprocess_context(self, context, max_length=512):

“”“Break up lengthy contexts into chunks under max_length”“”

chunks = []

current_chunk = []

current_length = 0

for phrase in context.cut up():

if current_length + 1 + len(phrase) > max_length:

chunks.append(” “.be part of(current_chunk))

current_chunk = [word]

current_length = len(phrase)

else:

current_chunk.append(phrase)

current_length += 1 + len(phrase) # size of area + phrase

# Add the final chunk if it isn’t empty

if current_chunk:

chunks.append(” “.be part of(current_chunk))

return chunks

def get_answer(self, query, context, config):

“”“Get reply with confidence rating”“”

# Verify cache

cache_key = (query, context)

if cache_key in self.answer_cache:

return self.answer_cache[cache_key]

# Preprocess context into chunks

context_chunks = self.preprocess_context(context, config.max_sequence_length)

# Get solutions from all chunks

solutions = []

for chunk in context_chunks:

consequence = self.qa_pipeline(query=query,

context=chunk,

max_answer_len=config.max_answer_length,

top_k=config.top_k)

assert isinstance(consequence, checklist)

for reply in consequence:

if reply[“score”] >= config.threshold:

solutions.append(reply)

# Return the very best reply or point out no reply discovered

if solutions:

best_answer = max(solutions, key=lambda x: x[“score”])

consequence = {

“reply”: best_answer[“answer”],

“confidence”: best_answer[“score”],

}

else:

consequence = {

“reply”: “No reply discovered”,

“confidence”: 0.0,

}

# Cache the consequence

self.answer_cache[cache_key] = consequence

return consequence

class ContextManager:

def __init__(self, max_contexts=10):

self.contexts = collections.OrderedDict()

self.max_contexts = max_contexts

def add_context(self, context_id, context):

“”“Add context with automated cleanup”“”

if len(self.contexts) >= self.max_contexts:

self.contexts.popitem(final=False)

self.contexts[context_id] = context

def get_context(self, context_id):

“”“Get context by ID”“”

return self.contexts.get(context_id)

def search_relevant_context(self, query, top_k=3):

“”“Seek for related contexts based mostly on relevance rating”“”

relevant_contexts = []

for context_id, context in self.contexts.objects():

relevance_score = self._calculate_relevance(query, context)

relevant_contexts.append((relevance_score, context_id))

return sorted(relevant_contexts, reverse=True)[:top_k]

def _calculate_relevance(self, query, context):

“”“Calculate relevance rating between query and context.

This can be a easy counting the variety of overlap phrases

““”

question_words = set(query.decrease().cut up())

context_words = set(context.decrease().cut up())

return len(question_words.intersection(context_words)) / len(question_words)

context_manager = ContextManager(max_contexts=10)

context_manager.add_context(“python”, “”“

Python is a high-level, interpreted programming language created by Guido van Rossum and launched in 1991.

Python’s design philosophy emphasizes code readability with its notable use of great whitespace.

Python includes a dynamic kind system and automated reminiscence administration and helps a number of programming

paradigms, together with structured, object-oriented, and purposeful programming.

““”)

context_manager.add_context(“machine_learning”, “”“

Machine studying is a area of examine that provides computer systems the flexibility to be taught with out being

explicitly programmed. It’s a department of synthetic intelligence based mostly on the concept that techniques

can be taught from information, determine patterns and make selections with minimal human intervention.

Constructing Q&A Methods with DistilBERT and Transformers

Google Cloud gen AI expertise helps healthcare organizations

Amazon Bedrock launches Session Administration APIs for generative AI functions (Preview)

Processing a Listing of CSVs Too Large for Reminiscence with Dask

Leave a Reply Cancel reply

College of Tub releases AI-Powered VR Biking Sport

Google Cloud gen AI expertise helps healthcare organizations

Generative AI-powered recreation design: Accelerating early improvement with Stability AI fashions on Amazon Bedrock

EON Actuality Releases Complete Technical Structure Documentation for Modern EON Exploratory Simulator – EON Actuality

Amazon Bedrock launches Session Administration APIs for generative AI functions (Preview)

More Stories

Leave a Reply Cancel reply

You may have missed