From 47b50b4827121ec68d9e52a018b631a4a3f09625 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Sat, 27 Jan 2024 02:00:44 +0100 Subject: Fix PerplexityLabs Provider, Improve bypass_cloudflare helper --- g4f/Provider/Aura.py | 5 +- g4f/Provider/Bestim.py | 15 +--- g4f/Provider/Chatgpt4Online.py | 30 +++---- g4f/Provider/PerplexityLabs.py | 2 +- g4f/Provider/helper.py | 8 +- g4f/Provider/selenium/AItianhuSpace.py | 4 +- g4f/errors.py | 4 +- g4f/requests.py | 143 +++++---------------------------- g4f/requests_aiohttp.py | 39 +++++++++ g4f/requests_curl_cffi.py | 77 ++++++++++++++++++ g4f/webdriver.py | 23 +++++- 11 files changed, 186 insertions(+), 164 deletions(-) create mode 100644 g4f/requests_aiohttp.py create mode 100644 g4f/requests_curl_cffi.py (limited to 'g4f') diff --git a/g4f/Provider/Aura.py b/g4f/Provider/Aura.py index 06887425..126c8d0f 100644 --- a/g4f/Provider/Aura.py +++ b/g4f/Provider/Aura.py @@ -1,5 +1,7 @@ from __future__ import annotations + from aiohttp import ClientSession + from ..typing import AsyncResult, Messages from .base_provider import AsyncGeneratorProvider @@ -23,7 +25,7 @@ class Aura(AsyncGeneratorProvider): "Content-Type": "application/json", "Origin": f"{cls.url}", "Referer": f"{cls.url}/", - "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', "Sec-Ch-Ua-Mobile": "?0", "Sec-Ch-Ua-Platform": '"Linux"', "Sec-Fetch-Dest": "empty", @@ -52,5 +54,6 @@ class Aura(AsyncGeneratorProvider): "temperature": 0.5 } async with session.post(f"{cls.url}/api/chat", json=data, proxy=proxy) as response: + response.raise_for_status() async for chunk in response.content.iter_any(): yield chunk.decode() \ No newline at end of file diff --git a/g4f/Provider/Bestim.py b/g4f/Provider/Bestim.py index be95b48a..323bd713 100644 --- a/g4f/Provider/Bestim.py +++ b/g4f/Provider/Bestim.py @@ -4,7 +4,6 @@ from ..typing import Messages from .base_provider import BaseProvider, CreateResult from ..requests import get_session_from_browser from uuid import uuid4 -import requests class Bestim(BaseProvider): url = "https://chatgpt.bestim.org" @@ -24,18 +23,7 @@ class Bestim(BaseProvider): ) -> CreateResult: session = get_session_from_browser(cls.url, proxy=proxy) headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', 'Accept': 'application/json, text/event-stream', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate, br', - 'Referer': 'https://chatgpt.bestim.org/chat/', - 'Origin': 'https://chatgpt.bestim.org', - 'Alt-Used': 'chatgpt.bestim.org', - 'Connection': 'keep-alive', - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'same-origin', - 'TE': 'trailers' } data = { "messagesHistory": [{ @@ -47,9 +35,8 @@ class Bestim(BaseProvider): } response = session.post( url="https://chatgpt.bestim.org/chat/send2/", - headers=headers, json=data, - proxies={"https": proxy}, + headers=headers, stream=True ) response.raise_for_status() diff --git a/g4f/Provider/Chatgpt4Online.py b/g4f/Provider/Chatgpt4Online.py index 4135ec9d..e923a8b1 100644 --- a/g4f/Provider/Chatgpt4Online.py +++ b/g4f/Provider/Chatgpt4Online.py @@ -5,6 +5,7 @@ import json from aiohttp import ClientSession from ..typing import Messages, AsyncResult +from ..requests import get_args_from_browser from .base_provider import AsyncGeneratorProvider from .helper import get_random_string @@ -12,8 +13,9 @@ class Chatgpt4Online(AsyncGeneratorProvider): url = "https://chatgpt4online.org" supports_message_history = True supports_gpt_35_turbo = True - working = False # cloudfare block ! + working = False _wpnonce = None + _context_id = None @classmethod async def create_async_generator( @@ -23,23 +25,10 @@ class Chatgpt4Online(AsyncGeneratorProvider): proxy: str = None, **kwargs ) -> AsyncResult: - headers = { - "accept": "*/*", - "accept-language": "en-US", - "content-type": "application/json", - "sec-ch-ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Google Chrome\";v=\"120\"", - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-platform": "\"Windows\"", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - "referer": "https://chatgpt4online.org/", - "referrer-policy": "strict-origin-when-cross-origin" - } - async with ClientSession(headers=headers) as session: + args = get_args_from_browser(f"{cls.url}/chat/", proxy=proxy) + async with ClientSession(**args) as session: if not cls._wpnonce: - async with session.get(f"{cls.url}/", proxy=proxy) as response: + async with session.get(f"{cls.url}/chat/", proxy=proxy) as response: response.raise_for_status() response = await response.text() result = re.search(r'restNonce":"(.*?)"', response) @@ -47,12 +36,17 @@ class Chatgpt4Online(AsyncGeneratorProvider): cls._wpnonce = result.group(1) else: raise RuntimeError("No nonce found") + result = re.search(r'contextId":(.*?),', response) + if result: + cls._context_id = result.group(1) + else: + raise RuntimeError("No contextId found") data = { "botId":"default", "customId":None, "session":"N/A", "chatId":get_random_string(11), - "contextId":58, + "contextId":cls._context_id, "messages":messages[:-1], "newMessage":messages[-1]["content"], "newImageId":None, diff --git a/g4f/Provider/PerplexityLabs.py b/g4f/Provider/PerplexityLabs.py index a7b98f7c..a82227cd 100644 --- a/g4f/Provider/PerplexityLabs.py +++ b/g4f/Provider/PerplexityLabs.py @@ -76,7 +76,7 @@ class PerplexityLabs(AsyncGeneratorProvider, ProviderModelMixin): 'model': cls.get_model(model), 'messages': messages } - await ws.send_str('42' + json.dumps(['perplexity_playground', message_data])) + await ws.send_str('42' + json.dumps(['perplexity_labs', message_data])) last_message = 0 while True: message = await ws.receive_str() diff --git a/g4f/Provider/helper.py b/g4f/Provider/helper.py index 87fdb158..9788bffd 100644 --- a/g4f/Provider/helper.py +++ b/g4f/Provider/helper.py @@ -22,11 +22,11 @@ except ImportError: has_browser_cookie3 = False from ..typing import Dict, Messages, Cookies, Optional -from ..errors import AiohttpSocksError, MissingRequirementsError +from ..errors import MissingAiohttpSocksError, MissingRequirementsError from .. import debug # Global variable to store cookies -_cookies: Dict[str, Dict[str, str]] = {} +_cookies: Dict[str, Cookies] = {} if has_browser_cookie3 and os.environ.get('DBUS_SESSION_BUS_ADDRESS') == "/dev/null": _LinuxPasswordManager.get_password = lambda a, b: b"secret" @@ -54,7 +54,7 @@ def set_cookies(domain_name: str, cookies: Cookies = None) -> None: else: _cookies.pop(domain_name) -def load_cookies_from_browsers(domain_name: str, raise_requirements_error: bool = True) -> Dict[str, str]: +def load_cookies_from_browsers(domain_name: str, raise_requirements_error: bool = True) -> Cookies: """ Helper function to load cookies from various browsers. @@ -149,5 +149,5 @@ def get_connector(connector: BaseConnector = None, proxy: str = None) -> Optiona from aiohttp_socks import ProxyConnector connector = ProxyConnector.from_url(proxy) except ImportError: - raise AiohttpSocksError('Install "aiohttp_socks" package for proxy support') + raise MissingAiohttpSocksError('Install "aiohttp_socks" package for proxy support') return connector \ No newline at end of file diff --git a/g4f/Provider/selenium/AItianhuSpace.py b/g4f/Provider/selenium/AItianhuSpace.py index 6af8ceaf..4c438e3b 100644 --- a/g4f/Provider/selenium/AItianhuSpace.py +++ b/g4f/Provider/selenium/AItianhuSpace.py @@ -52,9 +52,9 @@ class AItianhuSpace(AbstractProvider): wait.until(EC.visibility_of_element_located((By.ID, "sheet"))) driver.execute_script(f""" document.getElementById('sheet').addEventListener('click', () => {{ - window.open('{url}', '_blank'); + window.open(arguments[0]); }}); - """) + """, url) driver.find_element(By.ID, "sheet").click() time.sleep(10) diff --git a/g4f/errors.py b/g4f/errors.py index 6032001b..15bfafbd 100644 --- a/g4f/errors.py +++ b/g4f/errors.py @@ -31,10 +31,10 @@ class NestAsyncioError(Exception): class ModelNotSupportedError(Exception): pass -class MissingRequirementsError(ImportError): +class MissingRequirementsError(Exception): pass -class AiohttpSocksError(MissingRequirementsError): +class MissingAiohttpSocksError(MissingRequirementsError): pass class MissingAccessToken(Exception): diff --git a/g4f/requests.py b/g4f/requests.py index 46f3b457..275e108b 100644 --- a/g4f/requests.py +++ b/g4f/requests.py @@ -1,130 +1,21 @@ from __future__ import annotations -import json -from functools import partialmethod -from typing import AsyncGenerator from urllib.parse import urlparse try: - from curl_cffi.requests import AsyncSession, Session, Response + from curl_cffi.requests import Session + from .requests_curl_cffi import StreamResponse, StreamSession has_curl_cffi = True except ImportError: Session = type + from .requests_aiohttp import StreamResponse, StreamSession has_curl_cffi = False from .webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies from .errors import MissingRequirementsError -if not has_curl_cffi: - from aiohttp import ClientSession, ClientResponse, ClientTimeout - from .Provider.helper import get_connector - - class StreamResponse(ClientResponse): - async def iter_lines(self) -> iter[bytes, None]: - async for line in self.content: - yield line.rstrip(b"\r\n") - - async def json(self): - return await super().json(content_type=None) - - class StreamSession(ClientSession): - def __init__(self, headers: dict = {}, timeout: int = None, proxies: dict = {}, impersonate = None, **kwargs): - if impersonate: - headers = { - 'Accept-Encoding': 'gzip, deflate, br', - 'Accept-Language': 'en-US', - 'Connection': 'keep-alive', - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'same-site', - "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36', - 'Accept': '*/*', - 'sec-ch-ua': '"Google Chrome";v="107", "Chromium";v="107", "Not?A_Brand";v="24"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Windows"', - **headers - } - super().__init__( - **kwargs, - timeout=ClientTimeout(timeout) if timeout else None, - response_class=StreamResponse, - connector=get_connector(kwargs.get("connector"), proxies.get("https")), - headers=headers - ) -else: - class StreamResponse: - """ - A wrapper class for handling asynchronous streaming responses. - - Attributes: - inner (Response): The original Response object. - """ - - def __init__(self, inner: Response) -> None: - """Initialize the StreamResponse with the provided Response object.""" - self.inner: Response = inner - - async def text(self) -> str: - """Asynchronously get the response text.""" - return await self.inner.atext() - - def raise_for_status(self) -> None: - """Raise an HTTPError if one occurred.""" - self.inner.raise_for_status() - - async def json(self, **kwargs) -> dict: - """Asynchronously parse the JSON response content.""" - return json.loads(await self.inner.acontent(), **kwargs) - - async def iter_lines(self) -> AsyncGenerator[bytes, None]: - """Asynchronously iterate over the lines of the response.""" - async for line in self.inner.aiter_lines(): - yield line - - async def iter_content(self) -> AsyncGenerator[bytes, None]: - """Asynchronously iterate over the response content.""" - async for chunk in self.inner.aiter_content(): - yield chunk - - async def __aenter__(self): - """Asynchronously enter the runtime context for the response object.""" - inner: Response = await self.inner - self.inner = inner - self.request = inner.request - self.status_code: int = inner.status_code - self.reason: str = inner.reason - self.ok: bool = inner.ok - self.headers = inner.headers - self.cookies = inner.cookies - return self - - async def __aexit__(self, *args): - """Asynchronously exit the runtime context for the response object.""" - await self.inner.aclose() - - class StreamSession(AsyncSession): - """ - An asynchronous session class for handling HTTP requests with streaming. - - Inherits from AsyncSession. - """ - def request( - self, method: str, url: str, **kwargs - ) -> StreamResponse: - """Create and return a StreamResponse object for the given HTTP request.""" - return StreamResponse(super().request(method, url, stream=True, **kwargs)) - - # Defining HTTP methods as partial methods of the request method. - head = partialmethod(request, "HEAD") - get = partialmethod(request, "GET") - post = partialmethod(request, "POST") - put = partialmethod(request, "PUT") - patch = partialmethod(request, "PATCH") - delete = partialmethod(request, "DELETE") - - -def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session: +def get_args_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> dict: """ Create a Session object using a WebDriver to handle cookies and headers. @@ -137,26 +28,36 @@ def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = Returns: Session: A Session object configured with cookies and headers from the WebDriver. """ - if not has_curl_cffi: - raise MissingRequirementsError('Install "curl_cffi" package') - with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver: + with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=False) as driver: bypass_cloudflare(driver, url, timeout) cookies = get_driver_cookies(driver) user_agent = driver.execute_script("return navigator.userAgent") - parse = urlparse(url) - return Session( - cookies=cookies, - headers={ + return { + 'cookies': cookies, + 'headers': { 'accept': '*/*', + "accept-language": "en-US", + "accept-encoding": "gzip, deflate, br", 'authority': parse.netloc, 'origin': f'{parse.scheme}://{parse.netloc}', 'referer': url, + "sec-ch-ua": "\"Google Chrome\";v=\"121\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"121\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "Windows", 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', - 'user-agent': user_agent + 'user-agent': user_agent, }, + } + +def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session: + if not has_curl_cffi: + raise MissingRequirementsError('Install "curl_cffi" package') + args = get_args_from_browser(url, webdriver, proxy, timeout) + return Session( + **args, proxies={"https": proxy, "http": proxy}, timeout=timeout, impersonate="chrome110" diff --git a/g4f/requests_aiohttp.py b/g4f/requests_aiohttp.py new file mode 100644 index 00000000..aa097312 --- /dev/null +++ b/g4f/requests_aiohttp.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from aiohttp import ClientSession, ClientResponse, ClientTimeout +from typing import AsyncGenerator, Any + +from .Provider.helper import get_connector + +class StreamResponse(ClientResponse): + async def iter_lines(self) -> AsyncGenerator[bytes, None]: + async for line in self.content: + yield line.rstrip(b"\r\n") + + async def json(self) -> Any: + return await super().json(content_type=None) + +class StreamSession(ClientSession): + def __init__(self, headers: dict = {}, timeout: int = None, proxies: dict = {}, impersonate = None, **kwargs): + if impersonate: + headers = { + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'en-US', + 'Connection': 'keep-alive', + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-site', + "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36', + 'Accept': '*/*', + 'sec-ch-ua': '"Google Chrome";v="107", "Chromium";v="107", "Not?A_Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + **headers + } + super().__init__( + **kwargs, + timeout=ClientTimeout(timeout) if timeout else None, + response_class=StreamResponse, + connector=get_connector(kwargs.get("connector"), proxies.get("https")), + headers=headers + ) \ No newline at end of file diff --git a/g4f/requests_curl_cffi.py b/g4f/requests_curl_cffi.py new file mode 100644 index 00000000..64e41d65 --- /dev/null +++ b/g4f/requests_curl_cffi.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from curl_cffi.requests import AsyncSession, Response +from typing import AsyncGenerator, Any +from functools import partialmethod +import json + +class StreamResponse: + """ + A wrapper class for handling asynchronous streaming responses. + + Attributes: + inner (Response): The original Response object. + """ + + def __init__(self, inner: Response) -> None: + """Initialize the StreamResponse with the provided Response object.""" + self.inner: Response = inner + + async def text(self) -> str: + """Asynchronously get the response text.""" + return await self.inner.atext() + + def raise_for_status(self) -> None: + """Raise an HTTPError if one occurred.""" + self.inner.raise_for_status() + + async def json(self, **kwargs) -> Any: + """Asynchronously parse the JSON response content.""" + return json.loads(await self.inner.acontent(), **kwargs) + + async def iter_lines(self) -> AsyncGenerator[bytes, None]: + """Asynchronously iterate over the lines of the response.""" + async for line in self.inner.aiter_lines(): + yield line + + async def iter_content(self) -> AsyncGenerator[bytes, None]: + """Asynchronously iterate over the response content.""" + async for chunk in self.inner.aiter_content(): + yield chunk + + async def __aenter__(self): + """Asynchronously enter the runtime context for the response object.""" + inner: Response = await self.inner + self.inner = inner + self.request = inner.request + self.status_code: int = inner.status_code + self.reason: str = inner.reason + self.ok: bool = inner.ok + self.headers = inner.headers + self.cookies = inner.cookies + return self + + async def __aexit__(self, *args): + """Asynchronously exit the runtime context for the response object.""" + await self.inner.aclose() + +class StreamSession(AsyncSession): + """ + An asynchronous session class for handling HTTP requests with streaming. + + Inherits from AsyncSession. + """ + + def request( + self, method: str, url: str, **kwargs + ) -> StreamResponse: + """Create and return a StreamResponse object for the given HTTP request.""" + return StreamResponse(super().request(method, url, stream=True, **kwargs)) + + # Defining HTTP methods as partial methods of the request method. + head = partialmethod(request, "HEAD") + get = partialmethod(request, "GET") + post = partialmethod(request, "POST") + put = partialmethod(request, "PUT") + patch = partialmethod(request, "PATCH") + delete = partialmethod(request, "DELETE") diff --git a/g4f/webdriver.py b/g4f/webdriver.py index 66b3956e..ee03ff66 100644 --- a/g4f/webdriver.py +++ b/g4f/webdriver.py @@ -13,7 +13,8 @@ try: except ImportError: from typing import Type as WebDriver has_requirements = False - + +import time from os import path from os import access, R_OK from .errors import MissingRequirementsError @@ -92,7 +93,27 @@ def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None: if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": if debug.logging: print("Cloudflare protection detected:", url) + + # Open website in a new tab + element = driver.find_element(By.ID, "challenge-body-text") + driver.execute_script(f""" + arguments[0].addEventListener('click', () => {{ + window.open(arguments[1]); + }}); + """, element, url) + element.click() + time.sleep(3) + + # Switch to the new tab and close the old tab + original_window = driver.current_window_handle + for window_handle in driver.window_handles: + if window_handle != original_window: + driver.close() + driver.switch_to.window(window_handle) + break + try: + # Click on the challenge button in the iframe driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe")) WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input")) -- cgit v1.2.3