summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md24
-rw-r--r--etc/tool/readme_table.py14
-rw-r--r--g4f/Provider/Bing.py3
-rw-r--r--g4f/Provider/BingCreateImages.py1
-rw-r--r--g4f/Provider/DeepInfra.py33
-rw-r--r--g4f/Provider/DeepInfraImage.py1
-rw-r--r--g4f/Provider/MetaAIAccount.py1
-rw-r--r--g4f/Provider/ReplicateImage.py1
-rw-r--r--g4f/Provider/You.py22
-rw-r--r--g4f/Provider/needs_auth/Gemini.py87
-rw-r--r--g4f/Provider/needs_auth/OpenaiAccount.py1
-rw-r--r--g4f/Provider/needs_auth/OpenaiChat.py1
-rw-r--r--g4f/gui/server/api.py14
-rw-r--r--g4f/image.py9
14 files changed, 142 insertions, 70 deletions
diff --git a/README.md b/README.md
index 552569a2..66398659 100644
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ As per the survey, here is a list of improvements to come
```sh
docker pull hlohaus789/g4f
-docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/hardir:/app/har_and_cookies hlohaus789/g4f:latest
+docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/har_and_cookies:/app/har_and_cookies hlohaus789/g4f:latest
```
3. **Access the Client:**
@@ -400,17 +400,17 @@ While we wait for gpt-5, here is a list of new models that are at least better t
| openchat_3.5 | Huggingface | 2+ Providers | [huggingface.co](https://huggingface.co/) |
| pi | Inflection | g4f.Provider.Pi | [inflection.ai](https://inflection.ai/) |
-### Image Models
-
-| Label | Provider | Model | Website |
-| ----- | -------- | ----- | ------- |
-| Microsoft Designer | Bing | dall-e | [bing.com](https://www.bing.com/images/create) |
-| OpenAI ChatGPT | Openai | dall-e | [chat.openai.com](https://chat.openai.com) |
-| You.com | You | dall-e | [you.com](https://you.com) |
-| DeepInfraImage | DeepInfra | stability-ai/sdxl | [deepinfra.com](https://deepinfra.com) |
-| ReplicateImage | Replicate | stability-ai/sdxl | [replicate.com](https://replicate.com) |
-| Gemini | Gemini | gemini | [gemini.google.com](https://gemini.google.com) |
-| Meta AI | MetaAI | meta | [meta.ai](https://www.meta.ai) |
+### Image and Vision Models
+
+| Label | Provider | Image Model | Vision Model | Website |
+| ----- | -------- | ----------- | ------------ | ------- |
+| Microsoft Copilot in Bing | `g4f.Provider.Bing` | dall-e| gpt-4-vision | [bing.com](https://bing.com/chat) |
+| DeepInfra | `g4f.Provider.DeepInfra` | stability-ai/sdxl| llava-1.5-7b-hf | [deepinfra.com](https://deepinfra.com) |
+| Gemini | `g4f.Provider.Gemini` | gemini| gemini | [gemini.google.com](https://gemini.google.com) |
+| Meta AI | `g4f.Provider.MetaAI` | meta| ❌ | [meta.ai](https://www.meta.ai) |
+| OpenAI ChatGPT | `g4f.Provider.OpenaiChat` | dall-e| gpt-4-vision | [chat.openai.com](https://chat.openai.com) |
+| Replicate | `g4f.Provider.Replicate` | stability-ai/sdxl| ❌ | [replicate.com](https://replicate.com) |
+| You.com | `g4f.Provider.You` | dall-e| agent | [you.com](https://you.com) |
## 🔗 Powered by gpt4free
diff --git a/etc/tool/readme_table.py b/etc/tool/readme_table.py
index b56e4bca..439b17fa 100644
--- a/etc/tool/readme_table.py
+++ b/etc/tool/readme_table.py
@@ -127,8 +127,8 @@ def print_models():
def print_image_models():
lines = [
- "| Label | Provider | Model | Website |",
- "| ----- | -------- | ----- | ------- |",
+ "| Label | Provider | Image Model | Vision Model | Website |",
+ "| ----- | -------- | ----------- | ------------ | ------- |",
]
from g4f.gui.server.api import Api
for image_model in Api.get_image_models():
@@ -136,13 +136,15 @@ def print_image_models():
netloc = urlparse(provider_url).netloc.replace("www.", "")
website = f"[{netloc}]({provider_url})"
label = image_model["provider"] if image_model["label"] is None else image_model["label"]
- lines.append(f'| {label} | {image_model["provider"]} | {image_model["image_model"]} | {website} |')
+ if image_model["vision_model"] is None:
+ image_model["vision_model"] = "❌"
+ lines.append(f'| {label} | `g4f.Provider.{image_model["provider"]}` | {image_model["image_model"]}| {image_model["vision_model"]} | {website} |')
print("\n".join(lines))
if __name__ == "__main__":
- print_providers()
- print("\n", "-" * 50, "\n")
- print_models()
+ #print_providers()
+ #print("\n", "-" * 50, "\n")
+ #print_models()
print("\n", "-" * 50, "\n")
print_image_models() \ No newline at end of file
diff --git a/g4f/Provider/Bing.py b/g4f/Provider/Bing.py
index 1fe94359..bfd74f8c 100644
--- a/g4f/Provider/Bing.py
+++ b/g4f/Provider/Bing.py
@@ -38,8 +38,9 @@ class Bing(AsyncGeneratorProvider, ProviderModelMixin):
supports_message_history = True
supports_gpt_4 = True
default_model = "Balanced"
+ default_vision_model = "gpt-4-vision"
models = [getattr(Tones, key) for key in Tones.__dict__ if not key.startswith("__")]
-
+
@classmethod
def create_async_generator(
cls,
diff --git a/g4f/Provider/BingCreateImages.py b/g4f/Provider/BingCreateImages.py
index 60ecff07..69bf1e92 100644
--- a/g4f/Provider/BingCreateImages.py
+++ b/g4f/Provider/BingCreateImages.py
@@ -13,6 +13,7 @@ from .bing.create_images import create_images, create_session, get_cookies_from_
class BingCreateImages(AsyncGeneratorProvider, ProviderModelMixin):
label = "Microsoft Designer"
+ parent = "Bing"
url = "https://www.bing.com/images/create"
working = True
needs_auth = True
diff --git a/g4f/Provider/DeepInfra.py b/g4f/Provider/DeepInfra.py
index 971424b7..35ff84a1 100644
--- a/g4f/Provider/DeepInfra.py
+++ b/g4f/Provider/DeepInfra.py
@@ -1,17 +1,22 @@
from __future__ import annotations
import requests
-from ..typing import AsyncResult, Messages
+from ..typing import AsyncResult, Messages, ImageType
+from ..image import to_data_uri
from .needs_auth.Openai import Openai
class DeepInfra(Openai):
label = "DeepInfra"
url = "https://deepinfra.com"
working = True
- needs_auth = False
+ has_auth = True
supports_stream = True
supports_message_history = True
- default_model = 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1'
+ default_model = "meta-llama/Meta-Llama-3-70b-instruct"
+ default_vision_model = "llava-hf/llava-1.5-7b-hf"
+ model_aliases = {
+ 'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1'
+ }
@classmethod
def get_models(cls):
@@ -27,19 +32,12 @@ class DeepInfra(Openai):
model: str,
messages: Messages,
stream: bool,
+ image: ImageType = None,
api_base: str = "https://api.deepinfra.com/v1/openai",
temperature: float = 0.7,
max_tokens: int = 1028,
**kwargs
) -> AsyncResult:
-
- if not '/' in model:
- models = {
- 'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1',
- 'dbrx-instruct': 'databricks/dbrx-instruct',
- }
- model = models.get(model, model)
-
headers = {
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US',
@@ -55,6 +53,19 @@ class DeepInfra(Openai):
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
}
+ if image is not None:
+ if not model:
+ model = cls.default_vision_model
+ messages[-1]["content"] = [
+ {
+ "type": "image_url",
+ "image_url": {"url": to_data_uri(image)}
+ },
+ {
+ "type": "text",
+ "text": messages[-1]["content"]
+ }
+ ]
return super().create_async_generator(
model, messages,
stream=stream,
diff --git a/g4f/Provider/DeepInfraImage.py b/g4f/Provider/DeepInfraImage.py
index 8e56e513..46a5c2e2 100644
--- a/g4f/Provider/DeepInfraImage.py
+++ b/g4f/Provider/DeepInfraImage.py
@@ -9,6 +9,7 @@ from ..image import ImageResponse
class DeepInfraImage(AsyncGeneratorProvider, ProviderModelMixin):
url = "https://deepinfra.com"
+ parent = "DeepInfra"
working = True
default_model = 'stability-ai/sdxl'
image_models = [default_model]
diff --git a/g4f/Provider/MetaAIAccount.py b/g4f/Provider/MetaAIAccount.py
index d334393d..369b3f2f 100644
--- a/g4f/Provider/MetaAIAccount.py
+++ b/g4f/Provider/MetaAIAccount.py
@@ -6,6 +6,7 @@ from .MetaAI import MetaAI
class MetaAIAccount(MetaAI):
needs_auth = True
+ parent = "MetaAI"
image_models = ["meta"]
@classmethod
diff --git a/g4f/Provider/ReplicateImage.py b/g4f/Provider/ReplicateImage.py
index 5d001604..cc3943d7 100644
--- a/g4f/Provider/ReplicateImage.py
+++ b/g4f/Provider/ReplicateImage.py
@@ -11,6 +11,7 @@ from ..errors import ResponseError
class ReplicateImage(AsyncGeneratorProvider, ProviderModelMixin):
url = "https://replicate.com"
+ parent = "Replicate"
working = True
default_model = 'stability-ai/sdxl'
default_versions = [
diff --git a/g4f/Provider/You.py b/g4f/Provider/You.py
index 61069503..a9e7834c 100644
--- a/g4f/Provider/You.py
+++ b/g4f/Provider/You.py
@@ -14,13 +14,16 @@ from .you.har_file import get_telemetry_ids
from .. import debug
class You(AsyncGeneratorProvider, ProviderModelMixin):
+ label = "You.com"
url = "https://you.com"
working = True
supports_gpt_35_turbo = True
supports_gpt_4 = True
default_model = "gpt-3.5-turbo"
+ default_vision_model = "agent"
+ image_models = ["dall-e"]
models = [
- "gpt-3.5-turbo",
+ default_model,
"gpt-4",
"gpt-4-turbo",
"claude-instant",
@@ -29,12 +32,12 @@ class You(AsyncGeneratorProvider, ProviderModelMixin):
"claude-3-sonnet",
"gemini-pro",
"zephyr",
- "dall-e",
+ default_vision_model,
+ *image_models
]
model_aliases = {
"claude-v2": "claude-2"
}
- image_models = ["dall-e"]
_cookies = None
_cookies_used = 0
_telemetry_ids = []
@@ -52,7 +55,7 @@ class You(AsyncGeneratorProvider, ProviderModelMixin):
chat_mode: str = "default",
**kwargs,
) -> AsyncResult:
- if image is not None:
+ if image is not None or model == cls.default_vision_model:
chat_mode = "agent"
elif not model or model == cls.default_model:
...
@@ -63,13 +66,18 @@ class You(AsyncGeneratorProvider, ProviderModelMixin):
chat_mode = "custom"
model = cls.get_model(model)
async with StreamSession(
- proxies={"all": proxy},
+ proxy=proxy,
impersonate="chrome",
timeout=(30, timeout)
) as session:
cookies = await cls.get_cookies(session) if chat_mode != "default" else None
-
- upload = json.dumps([await cls.upload_file(session, cookies, to_bytes(image), image_name)]) if image else ""
+ upload = ""
+ if image is not None:
+ upload_file = await cls.upload_file(
+ session, cookies,
+ to_bytes(image), image_name
+ )
+ upload = json.dumps([upload_file])
headers = {
"Accept": "text/event-stream",
"Referer": f"{cls.url}/search?fromSearchBar=true&tbm=youchat",
diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py
index 3917df80..209c2e91 100644
--- a/g4f/Provider/needs_auth/Gemini.py
+++ b/g4f/Provider/needs_auth/Gemini.py
@@ -16,6 +16,7 @@ try:
except ImportError:
pass
+from ... import debug
from ...typing import Messages, Cookies, ImageType, AsyncResult
from ..base_provider import AsyncGeneratorProvider
from ..helper import format_prompt, get_cookies
@@ -54,6 +55,55 @@ class Gemini(AsyncGeneratorProvider):
needs_auth = True
working = True
image_models = ["gemini"]
+ default_vision_model = "gemini"
+ _cookies: Cookies = None
+
+ @classmethod
+ async def nodriver_login(cls) -> Cookies:
+ try:
+ import nodriver as uc
+ except ImportError:
+ return
+ try:
+ from platformdirs import user_config_dir
+ user_data_dir = user_config_dir("g4f-nodriver")
+ except:
+ user_data_dir = None
+ if debug.logging:
+ print(f"Open nodriver with user_dir: {user_data_dir}")
+ browser = await uc.start(user_data_dir=user_data_dir)
+ page = await browser.get(f"{cls.url}/app")
+ await page.select("div.ql-editor.textarea", 240)
+ cookies = {}
+ for c in await page.browser.cookies.get_all():
+ if c.domain.endswith(".google.com"):
+ cookies[c.name] = c.value
+ await page.close()
+ return cookies
+
+ @classmethod
+ async def webdriver_login(cls, proxy: str):
+ driver = None
+ try:
+ driver = get_browser(proxy=proxy)
+ try:
+ driver.get(f"{cls.url}/app")
+ WebDriverWait(driver, 5).until(
+ EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
+ )
+ except:
+ login_url = os.environ.get("G4F_LOGIN_URL")
+ if login_url:
+ yield f"Please login: [Google Gemini]({login_url})\n\n"
+ WebDriverWait(driver, 240).until(
+ EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
+ )
+ cls._cookies = get_driver_cookies(driver)
+ except MissingRequirementsError:
+ pass
+ finally:
+ if driver:
+ driver.close()
@classmethod
async def create_async_generator(
@@ -73,47 +123,30 @@ class Gemini(AsyncGeneratorProvider):
if cookies is None:
cookies = {}
cookies["__Secure-1PSID"] = api_key
- cookies = cookies if cookies else get_cookies(".google.com", False, True)
+ cls._cookies = cookies or cls._cookies or get_cookies(".google.com", False, True)
base_connector = get_connector(connector, proxy)
async with ClientSession(
headers=REQUEST_HEADERS,
connector=base_connector
) as session:
- snlm0e = await cls.fetch_snlm0e(session, cookies) if cookies else None
+ snlm0e = await cls.fetch_snlm0e(session, cls._cookies) if cls._cookies else None
if not snlm0e:
- driver = None
- try:
- driver = get_browser(proxy=proxy)
- try:
- driver.get(f"{cls.url}/app")
- WebDriverWait(driver, 5).until(
- EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
- )
- except:
- login_url = os.environ.get("G4F_LOGIN_URL")
- if login_url:
- yield f"Please login: [Google Gemini]({login_url})\n\n"
- WebDriverWait(driver, 240).until(
- EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))
- )
- cookies = get_driver_cookies(driver)
- except MissingRequirementsError:
- pass
- finally:
- if driver:
- driver.close()
+ cls._cookies = await cls.nodriver_login();
+ if cls._cookies is None:
+ async for chunk in cls.webdriver_login(proxy):
+ yield chunk
if not snlm0e:
- if "__Secure-1PSID" not in cookies:
+ if "__Secure-1PSID" not in cls._cookies:
raise MissingAuthError('Missing "__Secure-1PSID" cookie')
- snlm0e = await cls.fetch_snlm0e(session, cookies)
+ snlm0e = await cls.fetch_snlm0e(session, cls._cookies)
if not snlm0e:
- raise RuntimeError("Invalid auth. SNlM0e not found")
+ raise RuntimeError("Invalid cookies. SNlM0e not found")
image_url = await cls.upload_image(base_connector, to_bytes(image), image_name) if image else None
async with ClientSession(
- cookies=cookies,
+ cookies=cls._cookies,
headers=REQUEST_HEADERS,
connector=base_connector,
) as client:
diff --git a/g4f/Provider/needs_auth/OpenaiAccount.py b/g4f/Provider/needs_auth/OpenaiAccount.py
index 6260d343..16bfff66 100644
--- a/g4f/Provider/needs_auth/OpenaiAccount.py
+++ b/g4f/Provider/needs_auth/OpenaiAccount.py
@@ -4,4 +4,5 @@ from .OpenaiChat import OpenaiChat
class OpenaiAccount(OpenaiChat):
needs_auth = True
+ parent = "OpenaiChat"
image_models = ["dall-e"] \ No newline at end of file
diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py
index 3d6e9858..515230f0 100644
--- a/g4f/Provider/needs_auth/OpenaiChat.py
+++ b/g4f/Provider/needs_auth/OpenaiChat.py
@@ -44,6 +44,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
supports_message_history = True
supports_system_message = True
default_model = None
+ default_vision_model = "gpt-4-vision"
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-gizmo"]
model_aliases = {
"text-davinci-002-render-sha": "gpt-3.5-turbo",
diff --git a/g4f/gui/server/api.py b/g4f/gui/server/api.py
index 435700ea..3d9f6a1c 100644
--- a/g4f/gui/server/api.py
+++ b/g4f/gui/server/api.py
@@ -45,16 +45,20 @@ class Api():
@staticmethod
def get_image_models() -> list[dict]:
image_models = []
- for key, provider in __map__.items():
+ for provider in __providers__:
if hasattr(provider, "image_models"):
if hasattr(provider, "get_models"):
provider.get_models()
+ parent = provider
+ if hasattr(provider, "parent"):
+ parent = __map__[provider.parent]
for model in provider.image_models:
image_models.append({
- "provider": key,
- "url": provider.url,
- "label": provider.label if hasattr(provider, "label") else None,
- "image_model": model
+ "provider": parent.__name__,
+ "url": parent.url,
+ "label": parent.label if hasattr(parent, "label") else None,
+ "image_model": model,
+ "vision_model": parent.default_vision_model if hasattr(parent, "default_vision_model") else None
})
return image_models
diff --git a/g4f/image.py b/g4f/image.py
index ed8af103..270b59ad 100644
--- a/g4f/image.py
+++ b/g4f/image.py
@@ -86,7 +86,7 @@ def is_data_uri_an_image(data_uri: str) -> bool:
if image_format not in ALLOWED_EXTENSIONS and image_format != "svg+xml":
raise ValueError("Invalid image format (from mime file type).")
-def is_accepted_format(binary_data: bytes) -> bool:
+def is_accepted_format(binary_data: bytes) -> str:
"""
Checks if the given binary data represents an image with an accepted format.
@@ -241,6 +241,13 @@ def to_bytes(image: ImageType) -> bytes:
else:
return image.read()
+def to_data_uri(image: ImageType) -> str:
+ if not isinstance(image, str):
+ data = to_bytes(image)
+ data_base64 = base64.b64encode(data).decode()
+ return f"data:{is_accepted_format(data)};base64,{data_base64}"
+ return image
+
class ImageResponse:
def __init__(
self,