from azure.search.documents.indexes.models import (
FreshnessScoringFunction,
FreshnessScoringParameters,
ScoringProfile,
SearchableField,
SearchField,
SearchFieldDataType,
SimpleField,
TextWeights,
)
# Replace OpenAIEmbeddings with AzureOpenAIEmbeddings if Azure OpenAI is your provider.
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
openai_api_key=openai_api_key, openai_api_version=openai_api_version, model=model
)
embedding_function = embeddings.embed_query
fields = [
SimpleField(
name="id",
type=SearchFieldDataType.String,
key=True,
filterable=True,
),
SearchableField(
name="content",
type=SearchFieldDataType.String,
searchable=True,
),
SearchField(
name="content_vector",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=len(embedding_function("Text")),
vector_search_profile_name="myHnswProfile",
),
SearchableField(
name="metadata",
type=SearchFieldDataType.String,
searchable=True,
),
# Additional field to store the title
SearchableField(
name="title",
type=SearchFieldDataType.String,
searchable=True,
),
# Additional field for filtering on document source
SimpleField(
name="source",
type=SearchFieldDataType.String,
filterable=True,
),
# Additional data field for last doc update
SimpleField(
name="last_update",
type=SearchFieldDataType.DateTimeOffset,
searchable=True,
filterable=True,
),
]
# Adding a custom scoring profile with a freshness function
sc_name = "scoring_profile"
sc = ScoringProfile(
name=sc_name,
text_weights=TextWeights(weights={"title": 5}),
function_aggregation="sum",
functions=[
FreshnessScoringFunction(
field_name="last_update",
boost=100,
parameters=FreshnessScoringParameters(boosting_duration="P2D"),
interpolation="linear",
)
],
)
index_name = "langchain-vector-demo-custom-scoring-profile"
vector_store: AzureSearch = AzureSearch(
azure_search_endpoint=vector_store_address,
azure_search_key=vector_store_password,
index_name=index_name,
embedding_function=embeddings.embed_query,
fields=fields,
scoring_profiles=[sc],
default_scoring_profile=sc_name,
)