From dc502a22de5dc9aa7e10db3224ca3eff4e5fe35e Mon Sep 17 00:00:00 2001 From: abc <98614666+xtekky@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:35:18 +0100 Subject: ~ --- g4f/api/__init__.py | 126 ++++++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/g4f/api/__init__.py b/g4f/api/__init__.py index c52085dc..b19a721b 100644 --- a/g4f/api/__init__.py +++ b/g4f/api/__init__.py @@ -3,10 +3,10 @@ import random import string import time -import requests +# import requests from flask import Flask, request from flask_cors import CORS -from transformers import AutoTokenizer +# from transformers import AutoTokenizer from g4f import ChatCompletion @@ -95,67 +95,67 @@ def chat_completions(): # Get the embedding from huggingface -def get_embedding(input_text, token): - huggingface_token = token - embedding_model = "sentence-transformers/all-mpnet-base-v2" - max_token_length = 500 - - # Load the tokenizer for the 'all-mpnet-base-v2' model - tokenizer = AutoTokenizer.from_pretrained(embedding_model) - # Tokenize the text and split the tokens into chunks of 500 tokens each - tokens = tokenizer.tokenize(input_text) - token_chunks = [ - tokens[i : i + max_token_length] - for i in range(0, len(tokens), max_token_length) - ] - - # Initialize an empty list - embeddings = [] - - # Create embeddings for each chunk - for chunk in token_chunks: - # Convert the chunk tokens back to text - chunk_text = tokenizer.convert_tokens_to_string(chunk) - - # Use the Hugging Face API to get embeddings for the chunk - api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{embedding_model}" - headers = {"Authorization": f"Bearer {huggingface_token}"} - chunk_text = chunk_text.replace("\n", " ") - - # Make a POST request to get the chunk's embedding - response = requests.post( - api_url, - headers=headers, - json={"inputs": chunk_text, "options": {"wait_for_model": True}}, - ) - - # Parse the response and extract the embedding - chunk_embedding = response.json() - # Append the embedding to the list - embeddings.append(chunk_embedding) - - # averaging all the embeddings - # this isn't very effective - # someone a better idea? - num_embeddings = len(embeddings) - average_embedding = [sum(x) / num_embeddings for x in zip(*embeddings)] - embedding = average_embedding - return embedding - - -@app.route("/embeddings", methods=["POST"]) -def embeddings(): - input_text_list = request.get_json().get("input") - input_text = " ".join(map(str, input_text_list)) - token = request.headers.get("Authorization").replace("Bearer ", "") - embedding = get_embedding(input_text, token) - - return { - "data": [{"embedding": embedding, "index": 0, "object": "embedding"}], - "model": "text-embedding-ada-002", - "object": "list", - "usage": {"prompt_tokens": None, "total_tokens": None}, - } +# def get_embedding(input_text, token): +# huggingface_token = token +# embedding_model = "sentence-transformers/all-mpnet-base-v2" +# max_token_length = 500 + +# # Load the tokenizer for the 'all-mpnet-base-v2' model +# tokenizer = AutoTokenizer.from_pretrained(embedding_model) +# # Tokenize the text and split the tokens into chunks of 500 tokens each +# tokens = tokenizer.tokenize(input_text) +# token_chunks = [ +# tokens[i : i + max_token_length] +# for i in range(0, len(tokens), max_token_length) +# ] + +# # Initialize an empty list +# embeddings = [] + +# # Create embeddings for each chunk +# for chunk in token_chunks: +# # Convert the chunk tokens back to text +# chunk_text = tokenizer.convert_tokens_to_string(chunk) + +# # Use the Hugging Face API to get embeddings for the chunk +# api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{embedding_model}" +# headers = {"Authorization": f"Bearer {huggingface_token}"} +# chunk_text = chunk_text.replace("\n", " ") + +# # Make a POST request to get the chunk's embedding +# response = requests.post( +# api_url, +# headers=headers, +# json={"inputs": chunk_text, "options": {"wait_for_model": True}}, +# ) + +# # Parse the response and extract the embedding +# chunk_embedding = response.json() +# # Append the embedding to the list +# embeddings.append(chunk_embedding) + +# # averaging all the embeddings +# # this isn't very effective +# # someone a better idea? +# num_embeddings = len(embeddings) +# average_embedding = [sum(x) / num_embeddings for x in zip(*embeddings)] +# embedding = average_embedding +# return embedding + + +# @app.route("/embeddings", methods=["POST"]) +# def embeddings(): +# input_text_list = request.get_json().get("input") +# input_text = " ".join(map(str, input_text_list)) +# token = request.headers.get("Authorization").replace("Bearer ", "") +# embedding = get_embedding(input_text, token) + +# return { +# "data": [{"embedding": embedding, "index": 0, "object": "embedding"}], +# "model": "text-embedding-ada-002", +# "object": "list", +# "usage": {"prompt_tokens": None, "total_tokens": None}, +# } def run_api(): -- cgit v1.2.3