/openAI/
{
"swagger": "2.0",
"info": {
"contact": {
"email": "vora@www.sap.com"
},
"description": "This is a vFlow greeter demo.",
"license": {
"name": "Apache 2.0",
"url": "http://www.apache.org/licenses/LICENSE-2.0.html"
},
"termsOfService": "http://www.sap.com/vora/terms/",
"title": "vFlow greeter demo API",
"version": "1.1.0"
},
"schemes": [
"http",
"https"
],
"basePath": "/openAI",
"paths": {
"/completion": {
"post": {
"consumes": [
"application/json"
],
"description": "",
"operationId": "completion",
"parameters": [
{
"description": "A message",
"in": "body",
"name": "body",
"required": true,
"schema": {
"type": "string"
}
}
],
"produces": [
"application/json"
],
"responses": {
"200": {
"description": "",
"schema": {
"$ref": "#/definitions/echoReply"
}
}
},
"summary": ""
}
}
}
}
$.setPortCallback("input",onInput);
function isByteArray(data) {
return (typeof data === 'object' && Array.isArray(data)
&& data.length > 0 && typeof data[0] === 'number')
}
function sendResponse(s, m, e) {
if ($.output === null) {
// invoke the callback directly
$.sendResponse(s, m, e);
} else {
// let the subsequent operator decide what to do
if (e !== null) {
m.Attributes["message.response.error"] = e;
}
$.output(m);
}
}
function onInput(ctx,s) {
var msg = {};
var inbody = s.Body;
var inattributes = s.Attributes;
// convert the body into string if it is bytes
if (isByteArray(inbody)) {
inbody = String.fromCharCode.apply(null, inbody);
}
// prepare for a response message
msg.Attributes = {};
for (var key in inattributes) {
// only copy the headers that won't interfer with the recieving operators
if (key.indexOf("openapi.header") < 0 || key === "openapi.header.x-request-key") {
msg.Attributes[key] = inattributes[key];
}
}
msg.Body = {"openAI": inbody};
sendResponse(s, msg, null);
}
import os
import openai
import pandas as pd
import boto3
import tiktoken
import json
from io import StringIO
import numpy as np
os.environ["OPENAI_API_KEY"] = 'YOUR_OPENAI_API_KEY'
openai.api_key = os.getenv("OPENAI_API_KEY")
session = boto3.Session(
aws_access_key_id='<YOUR_S3_ACCESS_KEY>',
aws_secret_access_key='<YOUR_S3_SECRET_ACCESS_KEY>',
)
bucket_session = session.client('s3')
def get_csv_document( bucket: str, key: str):
return pd.read_csv(bucket_session.get_object(Bucket=bucket, Key=key).get("Body"))
def vector_similarity(x: list[float], y: list[float]) -> float:
"""
Returns the similarity between two vectors.
Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
"""
return np.dot(np.array(x), np.array(y))
def order_document_sections_by_query_similarity(query: str, contexts: dict[(str), np.array]) -> list[(float, (str, str))]:
"""
Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
to find the most relevant sections.
Return the list of document sections, sorted by relevance in descending order.
"""
query_embedding = get_embedding(query)
document_similarities = sorted([
(vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items() if vector_similarity(query_embedding, doc_embedding) > 0.8
], reverse=True)
return document_similarities
def get_embedding(text: str, model:str="text-embedding-ada-002"):
result = openai.Embedding.create(
model=model,
input=text
)
return result["data"][0]["embedding"]
def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
"""
Fetch relevant
"""
most_relevant_document_sections = order_document_sections_by_query_similarity(question, context_embeddings)
chosen_sections = []
chosen_sections_len = 0
chosen_sections_indexes = []
MAX_SECTION_LEN = 500
SEPARATOR = "\n* "
ENCODING = "gpt2" # encoding for text-davinci-003
encoding = tiktoken.get_encoding(ENCODING)
separator_len = len(encoding.encode(SEPARATOR))
df = df.set_index("AIRPORT")
for _, section_index in most_relevant_document_sections:
# Add contexts until we run out of space.
document_section = df.loc[section_index]
# print(document_section)
chosen_sections_len += len(encoding.encode("Country: "+ document_section.COUNTRY + " | City: " + document_section.CITY + " | Airport: " + section_index)) + separator_len
if chosen_sections_len > MAX_SECTION_LEN:
break
chosen_sections.append(SEPARATOR + "Country: "+ document_section.COUNTRY + " | City: " + document_section.CITY + " | Airport: " + section_index)
chosen_sections_indexes.append(str(section_index))
# Useful diagnostic information
print(f"Selected {len(chosen_sections)} document sections:")
print("\n".join(chosen_sections_indexes))
header = """Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n"""
return header + "".join(chosen_sections) + "\n\n Q: " + question + "\n A:"
def answer_query_with_context(
query: str,
df: pd.DataFrame,
document_embeddings: dict[(str, str), np.array],
show_prompt: bool = False
) -> str:
prompt = construct_prompt(
query,
document_embeddings,
df
)
if show_prompt:
print(prompt)
COMPLETIONS_API_PARAMS = {
# We use temperature of 0.0 because it gives the most predictable, factual answer.
"temperature": 0.7,
"max_tokens": 300,
"model": "text-davinci-003",
}
response = openai.Completion.create(
prompt=prompt,
**COMPLETIONS_API_PARAMS
)
return response
def query_callback(input):
inputDict = json.loads(input.body)
airports = get_csv_document('<YOUR_S3_BUCKET>','<YOUR_S3_OBJECT_WITH_DATA_NAME>')
embedding = get_csv_document('<YOUR_S3_BUCKET>','<YOUR_S3_OBJECT_WITH_EMBEDDINGS_NAME>')
# It´s important to output the input.body within the output message, so that the OpenAPI Servlow operator recognizes the request message and replies it correctly to the client.
message = {}
answer = answer_query_with_context(str(inputDict["query"]), airports, embedding)
message["body"] = input.body
message["completion"] = answer
input_message_attributes = input.attributes
api.send("indexStr", api.Message(message, input_message_attributes))
api.set_port_callback(["inputStr"], query_callback)
https://<YOUR_DI_HOST>/app/pipeline-modeler/openapi/service/<YOUR_OPENAPI_SERVLOW_BASE_PATH>/<YOUR_E...;
{
"query":"What are the airports available in New York?"
}
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
35 | |
25 | |
17 | |
13 | |
8 | |
7 | |
6 | |
6 | |
6 | |
6 |