from typing import List,Dict,Any,Optional,Union import numpy as np import weaviate from weaviate import WeaviateClient from weaviate.collections import Collection import weaviate.classes.config as wc from weaviate.classes.config import Property,DataType from trustrag.modules.retrieval.embedding import EmbeddingGenerator fromweaviate.classes.query import MetadataQuery
:param collection_name:Name of the Weaviate collection :param embedding_generator:An instance of EmbeddingGenerator to generate embeddings :param weaviate_client_paramsictionary of parameters to pass to Weaviate client """ self.collection_name=collection_name self.embedding_generator=embedding_generator
#Initialize Weaviate client with provided parameters self.client=weaviate.connect_to_custom( skip_init_checks=False, **client_params )
#Create collection if it doesn't exist ifnot self._collection_exists(): self._create_collection()
def _collection_exists(self)->bool: """Check if collection exists in Weaviate.""" try: collections=self.client.collections.list_all() collection_names=[c.lower()forcincollections] returnself.collection_nameincollection_names except Exception as e: print(f"Error checking collection existence:{e}") returnFalse
def _create_collection(self): """Create a new collection in Weaviate.""" try: self.client.collections.create( name=self.collection_name, #Define properties of metadata properties=[ wc.Property( name="text", data_type=wc.DataType.TEXT ), wc.Property( name="title", data_type=wc.DataType.TEXT, skip_vectorization=True ), ] ) except Exception as e: print(f"Error creating collection:{e}") raise
def upload_vectors( self, vectors:Union[np.ndarray,List[List[float]]], payloadist[Dict[str,Any]], batch_size:int=100 ): """ Upload vectors and payload to the Weaviate collection.
:param vectors:A numpy array or list of vectors to upload :param payload:A list of dictionaries containing the payload for each vector :param batch_size:Number of vectors to upload in a single batch """ ifnot isinstance(vectors,np.ndarray): vectors=np.array(vectors) iflen(vectors)!=len(payload): raise ValueError("Vectors and payload must have the same length.")
#Process in batches foriinrange(0,len(vectors),batch_size): batch_vectors=vectors[i:i+batch_size] batch_payload=payload[i:i+batch_size]
try: with collection.batch.dynamic()as batch: foridx,(properties,vector)inenumerate(zip(batch_payload,batch_vectors)): #Separate text content and other metadata text_content=properties.get('description', '')#Assuming'description'is the main text field metadata={k:vfork,vinproperties.items()ifk!='description'}
#Prepare the properties dictionary properties_dict={ "text":text_content, "title":metadata.get('title',f'Object{idx}')#Using title from metadata or default }
#Add the object with properties and vector batch.add_object( properties=properties_dict, vector=vector ) except Exception as e: print(f"Error uploading batch:{e}") raise
def search( self, text:str, query_filter:Optional[Dict[str,Any]]=None, limit:int=5 )->List[Dict[str,Any]]: """ Search for the closest vectors in the collection based on the input text.
:param text:The text query to search for :param query_filter:Optional filter to apply to the search :param limit:Number of closest results to return :returnist of payloads from the closest vectors """ #Generate embedding for the query text vector=self.embedding_generator.generate_embedding(text) print(vector.shape) collection=self.client.collections.get(self.collection_name)
#Convert results to the same format as QdrantEngine payloads=[] forobjinresults.objects: payload=obj.properties.get('metadata',{}) payload['text']=obj.properties.get('text','') payload['_distance']=obj.metadata.distance payloads.append(payload)
returnpayloads
def build_filter(self,conditionsist[Dict[str,Any]])->Dict[str,Any]: """ Build a Weaviate filter from a list of conditions.
:param conditions:A list of conditions,where each condition is a dictionary with: -key:The field name to filter on -match:The value to match :return:A Weaviate filter object """ filter_dict={ "operator":"And", "operands":[] }
forconditioninconditions: key=condition.get("key") match_value=condition.get("match") ifkey and match_value is not None: filter_dict["operands"].append({ "path":[f"metadata.{key}"], "operator":"Equal", "valueString":str(match_value) })
documents=[ {"name":"SaferCodes","images":"https://safer.codes/img/brand/logo-icon.png", "alt":"SaferCodes Logo QR codes generator system forms for COVID-19", "description":"QR codes systems for COVID-19.\nSimple tools for bars,restaurants,offices,and other small proximity businesses.", "link":"https://safer.codes","city":"Chicago"}, {"name":"Human Practice", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/373036-94d1e190f12f2c919c3566ecaecbda68-thumb_jpg.jpg?buster=1396498835", "alt":"Human Practice-health care information technology", "description":"oint-of-care word of mouth\nPreferral is a mobile platform that channels physicians\u2019 interest in networking with their peers to build referrals within a hospital system.\nHospitals are in a race to employ physicians,even though they lose billions each year($40Bin 2014)on employment.Why...", "link":"http://humanpractice.com","city":"Chicago"}, {"name":"StyleSeek", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/3747-bb0338d641617b54f5234a1d3bfc6fd0-thumb_jpg.jpg?buster=1329158692", "alt":"StyleSeek-e-commerce fashion mass customization online shopping", "description":"ersonalized e-commerce for lifestyle products\nStyleSeek is a personalized e-commerce site for lifestyle products.\nIt works across the style spectrum by enabling users(both men and women)to create and refine their unique StyleDNA.\nStyleSeek also promotes new products via its email newsletter,100%personalized...", "link":"http://styleseek.com","city":"Chicago"}, {"name":"Scout", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/190790-dbe27fe8cda0614d644431f853b64e8f-thumb_jpg.jpg?buster=1389652078", "alt":"Scout-security consumer electronics internet of things", "description":"Hassle-free Home Security\nScout is a self-installed,wireless home security system.We've created a more open,affordable and modern system than what is available on the market today.With month-to-month contracts and portable devices,Scout is a renter-friendly solution for the other...", "link":"http://www.scoutalarm.com","city":"Chicago"}, {"name":"Invitation codes","images":"https://invitation.codes/img/inv-brand-fb3.png", "alt":"Invitation App-Share referral codes community", "description":"The referral community\nInvitation App is a social network where people post their referral codes and collect rewards on autopilot.", "link":"https://invitation.codes","city":"Chicago"}, {"name":"Hyde Park Angels", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/61114-35cd9d9689b70b4dc1d0b3c5f11c26e7-thumb_jpg.jpg?buster=1427395222", "alt":"Hyde Park Angels-", "description":"Hyde Park Angels is the largest and most active angel group in the Midwest.With a membership of over 100 successful entrepreneurs,executives,and venture capitalists,the organization prides itself on providing critical strategic expertise to entrepreneurs and...", "link":"http://hydeparkangels.com","city":"Chicago"}, {"name":"GiveForward", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/1374-e472ccec267bef9432a459784455c133-thumb_jpg.jpg?buster=1397666635", "alt":"GiveForward-health care startups crowdfunding", "description":"Crowdfunding for medical and life events\nGiveForward lets anyone to create a free fundraising page for a friend or loved one's uncovered medical bills,memorial fund,adoptions or any other life events in five minutes or less.Millions of families have used GiveForward to raise more than$165Mto let...", "link":"http://giveforward.com","city":"Chicago"}, {"name":"MentorMob", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/19374-3b63fcf38efde624dd79c5cbd96161db-thumb_jpg.jpg?buster=1315734490", "alt":"MentorMob-digital media education ventures for good crowdsourcing", "description":"Google of Learning,indexed by experts\nProblem:Google doesn't index for learning.Nearly 1 billion Google searches are done for\"how to\"learn various topics every month,from photography to entrepreneurship,forcing learners to waste their time sifting through the millions of results.\nMentorMob is...", "link":"http://www.mentormob.com","city":"Chicago"}, {"name":"The Boeing Company", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/49394-df6be7a1eca80e8e73cc6699fee4f772-thumb_jpg.jpg?buster=1406172049", "alt":"The Boeing Company-manufacturing transportation","description":"", "link":"http://www.boeing.com","city":"Berlin"}, {"name":"NowBoarding\u2708\ufe0f", "images":"https://static.above.flights/img/lowcost/envelope_blue.png", "alt":"Lowcost Email cheap flights alerts", "description":"Invite-only mailing list.\n\nWe search the best weekend and long-haul flight deals\nso you can book before everyone else.", "link":"https://nowboarding.club/","city":"Berlin"}, {"name":"Rocketmiles", "images":"https://d1qb2nb5cznatu.cloudfront.net/startups/i/158571-e53ddffe9fb3ed5e57080db7134117d0-thumb_jpg.jpg?buster=1361371304", "alt":"Rocketmiles-e-commerce online travel loyalty programs hotels", "description":"Fueling more vacations\nWe enable our customers to travel more,travel better and travel further.20M+consumers stock away miles&points to satisfy their wanderlust.\nFlying around or using credit cards are the only good ways to fill the stockpile today.We've built the third way.Customers...", "link":"http://www.Rocketmiles.com","city":"Berlin"}
{'text':"Fueling more vacations\nWe enable our customers to travel more,travel better and travel further.20M+consumers stock away miles&points to satisfy their wanderlust.\nFlying around or using credit cards are the only good ways to fill the stockpile today.We've built the third way.Customers...",'_distance':0.5216099619865417