From 2157ccbcdb5d781d389e24db332d2fb78b1159a9 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Sat, 2 Dec 2023 05:40:07 +0100 Subject: Add get_session_from_browser as cloudflare bypass --- g4f/Provider/AItianhuSpace.py | 2 +- g4f/Provider/GptChatly.py | 49 ++++++------------- g4f/Provider/MyShell.py | 2 +- g4f/Provider/PerplexityAi.py | 2 +- g4f/Provider/Pi.py | 77 ++++++++++------------------- g4f/Provider/TalkAi.py | 2 +- g4f/Provider/needs_auth/Bard.py | 2 +- g4f/Provider/needs_auth/OpenaiChat.py | 2 +- g4f/Provider/needs_auth/Poe.py | 2 +- g4f/Provider/needs_auth/Theb.py | 2 +- g4f/Provider/selenium/Phind.py | 3 +- g4f/Provider/webdriver.py | 92 ----------------------------------- g4f/requests.py | 56 +++++++++++++++++++-- g4f/webdriver.py | 82 +++++++++++++++++++++++++++++++ 14 files changed, 185 insertions(+), 190 deletions(-) delete mode 100644 g4f/Provider/webdriver.py create mode 100644 g4f/webdriver.py (limited to 'g4f') diff --git a/g4f/Provider/AItianhuSpace.py b/g4f/Provider/AItianhuSpace.py index 95386e8e..8d9feb2b 100644 --- a/g4f/Provider/AItianhuSpace.py +++ b/g4f/Provider/AItianhuSpace.py @@ -6,7 +6,7 @@ import random from ..typing import CreateResult, Messages from .base_provider import BaseProvider from .helper import format_prompt, get_random_string -from .webdriver import WebDriver, WebDriverSession +from ..webdriver import WebDriver, WebDriverSession from .. import debug class AItianhuSpace(BaseProvider): diff --git a/g4f/Provider/GptChatly.py b/g4f/Provider/GptChatly.py index dcedfe1b..d98c2af4 100644 --- a/g4f/Provider/GptChatly.py +++ b/g4f/Provider/GptChatly.py @@ -1,9 +1,8 @@ from __future__ import annotations -from ..requests import StreamSession +from ..requests import Session, get_session_from_browser from ..typing import Messages from .base_provider import AsyncProvider -from .helper import get_cookies class GptChatly(AsyncProvider): @@ -18,40 +17,20 @@ class GptChatly(AsyncProvider): cls, model: str, messages: Messages, - proxy: str = None, cookies: dict = None, **kwargs) -> str: - - cookies = get_cookies('gptchatly.com') if not cookies else cookies - if not cookies: - raise RuntimeError( - "g4f.provider.GptChatly requires cookies, [refresh https://gptchatly.com on chrome]" - ) - + proxy: str = None, + timeout: int = 120, + session: Session = None, + **kwargs + ) -> str: + if not session: + session = get_session_from_browser(cls.url, proxy=proxy, timeout=timeout) if model.startswith("gpt-4"): chat_url = f"{cls.url}/fetch-gpt4-response" else: - chat_url = f"{cls.url}/fetch-response" - - headers = { - 'authority': 'gptchatly.com', - 'accept': '*/*', - 'accept-language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', - 'content-type': 'application/json', - 'origin': 'https://gptchatly.com', - 'referer': 'https://gptchatly.com/', - 'sec-ch-ua': '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"macOS"', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-origin', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', + chat_url = f"{cls.url}/felch-response" + data = { + "past_conversations": messages } - - async with StreamSession(headers=headers, - proxies={"https": proxy}, cookies=cookies, impersonate='chrome110') as session: - data = { - "past_conversations": messages - } - async with session.post(chat_url, json=data) as response: - response.raise_for_status() - return (await response.json())["chatGPTResponse"] \ No newline at end of file + response = session.post(chat_url, json=data) + response.raise_for_status() + return response.json()["chatGPTResponse"] \ No newline at end of file diff --git a/g4f/Provider/MyShell.py b/g4f/Provider/MyShell.py index 5c9c4fe6..2ee94bb6 100644 --- a/g4f/Provider/MyShell.py +++ b/g4f/Provider/MyShell.py @@ -5,7 +5,7 @@ import time, json from ..typing import CreateResult, Messages from .base_provider import BaseProvider from .helper import format_prompt -from .webdriver import WebDriver, WebDriverSession +from ..webdriver import WebDriver, WebDriverSession class MyShell(BaseProvider): url = "https://app.myshell.ai/chat" diff --git a/g4f/Provider/PerplexityAi.py b/g4f/Provider/PerplexityAi.py index 03353a95..941ca6d4 100644 --- a/g4f/Provider/PerplexityAi.py +++ b/g4f/Provider/PerplexityAi.py @@ -5,7 +5,7 @@ import time from ..typing import CreateResult, Messages from .base_provider import BaseProvider from .helper import format_prompt -from .webdriver import WebDriver, WebDriverSession +from ..webdriver import WebDriver, WebDriverSession class PerplexityAi(BaseProvider): url = "https://www.perplexity.ai" diff --git a/g4f/Provider/Pi.py b/g4f/Provider/Pi.py index 8acf3ec6..385f477b 100644 --- a/g4f/Provider/Pi.py +++ b/g4f/Provider/Pi.py @@ -4,10 +4,10 @@ from ..typing import CreateResult, Messages from .base_provider import BaseProvider, format_prompt import json -from cloudscraper import CloudScraper, session, create_scraper +from ..requests import Session, get_session_from_browser class Pi(BaseProvider): - url = "https://chat-gpt.com" + url = "https://pi.ai/talk" working = True supports_stream = True @@ -17,75 +17,52 @@ class Pi(BaseProvider): model: str, messages: Messages, stream: bool, + session: Session = None, proxy: str = None, - scraper: CloudScraper = None, - conversation: dict = None, + timeout: int = 180, + conversation_id: str = None, **kwargs ) -> CreateResult: - if not scraper: - scraper = cls.get_scraper(proxy) - if not conversation: - conversation = cls.start_conversation(scraper) - answer = cls.ask(scraper, messages, conversation) + if not session: + session = get_session_from_browser(url=cls.url, proxy=proxy, timeout=timeout) + if not conversation_id: + conversation_id = cls.start_conversation(session) + answer = cls.ask(session, messages, conversation_id) for line in answer: if "text" in line: yield line["text"] - - def get_scraper(proxy: str): - return create_scraper( - browser={ - 'browser': 'chrome', - 'platform': 'windows', - 'desktop': True - }, - headers={ - 'Accept': '*/*', - 'Accept-Encoding': 'deflate,gzip,br', - }, - proxies={ - "https": proxy - } - ) - - def start_conversation(scraper: CloudScraper): - response = scraper.post('https://pi.ai/api/chat/start', data="{}", headers={ + + @classmethod + def start_conversation(cls, session: Session) -> str: + response = session.post('https://pi.ai/api/chat/start', data="{}", headers={ 'accept': 'application/json', 'x-api-version': '3' }) if 'Just a moment' in response.text: raise RuntimeError('Error: Cloudflare detected') - return Conversation( - response.json()['conversations'][0]['sid'], - response.cookies - ) + return response.json()['conversations'][0]['sid'] - def get_chat_history(scraper: CloudScraper, conversation: Conversation): + def get_chat_history(session: Session, conversation_id: str): params = { - 'conversation': conversation.sid, + 'conversation': conversation_id, } - response = scraper.get('https://pi.ai/api/chat/history', params=params, cookies=conversation.cookies) + response = session.get('https://pi.ai/api/chat/history', params=params) if 'Just a moment' in response.text: raise RuntimeError('Error: Cloudflare detected') return response.json() - def ask(scraper: CloudScraper, messages: Messages, conversation: Conversation): + def ask(session: Session, messages: Messages, conversation_id: str): json_data = { 'text': format_prompt(messages), - 'conversation': conversation.sid, + 'conversation': conversation_id, 'mode': 'BASE', } - response = scraper.post('https://pi.ai/api/chat', json=json_data, cookies=conversation.cookies, stream=True) - - for line in response.iter_lines(chunk_size=1024, decode_unicode=True): - if 'Just a moment' in line: + response = session.post('https://pi.ai/api/chat', json=json_data, stream=True) + for line in response.iter_lines(): + if b'Just a moment' in line: raise RuntimeError('Error: Cloudflare detected') - if line.startswith('data: {"text":'): - yield json.loads(line.split('data: ')[1]) - if line.startswith('data: {"title":'): - yield json.loads(line.split('data: ')[1]) - -class Conversation(): - def __init__(self, sid: str, cookies): - self.sid = sid - self.cookies = cookies + if line.startswith(b'data: {"text":'): + yield json.loads(line.split(b'data: ')[1]) + elif line.startswith(b'data: {"title":'): + yield json.loads(line.split(b'data: ')[1]) \ No newline at end of file diff --git a/g4f/Provider/TalkAi.py b/g4f/Provider/TalkAi.py index 0edd9f6b..85f56dda 100644 --- a/g4f/Provider/TalkAi.py +++ b/g4f/Provider/TalkAi.py @@ -4,7 +4,7 @@ import time, json, time from ..typing import CreateResult, Messages from .base_provider import BaseProvider -from .webdriver import WebDriver, WebDriverSession +from ..webdriver import WebDriver, WebDriverSession class TalkAi(BaseProvider): url = "https://talkai.info" diff --git a/g4f/Provider/needs_auth/Bard.py b/g4f/Provider/needs_auth/Bard.py index 2c1f6121..877af37e 100644 --- a/g4f/Provider/needs_auth/Bard.py +++ b/g4f/Provider/needs_auth/Bard.py @@ -5,7 +5,7 @@ import time from ...typing import CreateResult, Messages from ..base_provider import BaseProvider from ..helper import format_prompt -from ..webdriver import WebDriver, WebDriverSession +from ...webdriver import WebDriver, WebDriverSession class Bard(BaseProvider): url = "https://bard.google.com" diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 8c9dd1e0..af62382a 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -7,7 +7,7 @@ from async_property import async_cached_property from ..base_provider import AsyncGeneratorProvider from ..helper import get_event_loop -from ..webdriver import get_browser +from ...webdriver import get_browser from ...typing import AsyncResult, Messages from ...requests import StreamSession diff --git a/g4f/Provider/needs_auth/Poe.py b/g4f/Provider/needs_auth/Poe.py index 99f6945b..200ded3b 100644 --- a/g4f/Provider/needs_auth/Poe.py +++ b/g4f/Provider/needs_auth/Poe.py @@ -5,7 +5,7 @@ import time from ...typing import CreateResult, Messages from ..base_provider import BaseProvider from ..helper import format_prompt -from ..webdriver import WebDriver, WebDriverSession +from ...webdriver import WebDriver, WebDriverSession models = { "meta-llama/Llama-2-7b-chat-hf": {"name": "Llama-2-7b"}, diff --git a/g4f/Provider/needs_auth/Theb.py b/g4f/Provider/needs_auth/Theb.py index 49ee174b..82eac6e2 100644 --- a/g4f/Provider/needs_auth/Theb.py +++ b/g4f/Provider/needs_auth/Theb.py @@ -5,7 +5,7 @@ import time from ...typing import CreateResult, Messages from ..base_provider import BaseProvider from ..helper import format_prompt -from ..webdriver import WebDriver, WebDriverSession +from ...webdriver import WebDriver, WebDriverSession models = { "theb-ai": "TheB.AI", diff --git a/g4f/Provider/selenium/Phind.py b/g4f/Provider/selenium/Phind.py index b9a37f97..2722307d 100644 --- a/g4f/Provider/selenium/Phind.py +++ b/g4f/Provider/selenium/Phind.py @@ -6,7 +6,7 @@ from urllib.parse import quote from ...typing import CreateResult, Messages from ..base_provider import BaseProvider from ..helper import format_prompt -from ..webdriver import WebDriver, WebDriverSession +from ...webdriver import WebDriver, WebDriverSession class Phind(BaseProvider): url = "https://www.phind.com" @@ -26,7 +26,6 @@ class Phind(BaseProvider): creative_mode: bool = None, **kwargs ) -> CreateResult: - driver.start_session with WebDriverSession(webdriver, "", proxy=proxy) as driver: from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait diff --git a/g4f/Provider/webdriver.py b/g4f/Provider/webdriver.py deleted file mode 100644 index da3b13ed..00000000 --- a/g4f/Provider/webdriver.py +++ /dev/null @@ -1,92 +0,0 @@ -from __future__ import annotations - -import time -from platformdirs import user_config_dir -try: - from selenium.webdriver.remote.webdriver import WebDriver -except ImportError: - class WebDriver(): - pass -try: - from undetected_chromedriver import Chrome, ChromeOptions -except ImportError: - class Chrome(): - def __init__(): - raise RuntimeError('Please install the "undetected_chromedriver" package') - class ChromeOptions(): - def add_argument(): - pass -try: - from pyvirtualdisplay import Display - has_pyvirtualdisplay = True -except ImportError: - has_pyvirtualdisplay = False - -def get_browser( - user_data_dir: str = None, - headless: bool = False, - proxy: str = None, - options: ChromeOptions = None -) -> Chrome: - if user_data_dir == None: - user_data_dir = user_config_dir("g4f") - if proxy: - if not options: - options = ChromeOptions() - options.add_argument(f'--proxy-server={proxy}') - return Chrome(options=options, user_data_dir=user_data_dir, headless=headless) - -class WebDriverSession(): - def __init__( - self, - webdriver: WebDriver = None, - user_data_dir: str = None, - headless: bool = False, - virtual_display: bool = False, - proxy: str = None, - options: ChromeOptions = None - ): - self.webdriver = webdriver - self.user_data_dir = user_data_dir - self.headless = headless - self.virtual_display = None - if has_pyvirtualdisplay and virtual_display: - self.virtual_display = Display(size=(1920,1080)) - self.proxy = proxy - self.options = options - self.default_driver = None - - def reopen( - self, - user_data_dir: str = None, - headless: bool = False, - virtual_display: bool = False - ) -> WebDriver: - if user_data_dir == None: - user_data_dir = self.user_data_dir - if self.default_driver: - self.default_driver.quit() - if not virtual_display and self.virtual_display: - self.virtual_display.stop() - self.virtual_display = None - self.default_driver = get_browser(user_data_dir, headless, self.proxy) - return self.default_driver - - def __enter__(self) -> WebDriver: - if self.webdriver: - return self.webdriver - if self.virtual_display: - self.virtual_display.start() - self.default_driver = get_browser(self.user_data_dir, self.headless, self.proxy, self.options) - return self.default_driver - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.default_driver: - try: - self.default_driver.close() - except: - pass - time.sleep(0.1) - self.default_driver.quit() - if self.virtual_display: - self.virtual_display.stop() \ No newline at end of file diff --git a/g4f/requests.py b/g4f/requests.py index b70789d4..8cf70ac9 100644 --- a/g4f/requests.py +++ b/g4f/requests.py @@ -4,8 +4,12 @@ import json from contextlib import asynccontextmanager from functools import partialmethod from typing import AsyncGenerator - -from curl_cffi.requests import AsyncSession, Response +from urllib.parse import urlparse +from curl_cffi.requests import AsyncSession, Session, Response +from .webdriver import WebDriver, WebDriverSession +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC class StreamResponse: def __init__(self, inner: Response) -> None: @@ -50,4 +54,50 @@ class StreamSession(AsyncSession): post = partialmethod(request, "POST") put = partialmethod(request, "PUT") patch = partialmethod(request, "PATCH") - delete = partialmethod(request, "DELETE") \ No newline at end of file + delete = partialmethod(request, "DELETE") + +def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120): + with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver: + driver.get(url) + + # Is cloudflare protection + if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": + try: + # Click button in iframe + WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe")) + ) + driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe")) + WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input")) + ) + driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click() + except: + pass + finally: + driver.switch_to.default_content() + # No cloudflare protection + WebDriverWait(driver, timeout).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)")) + ) + + cookies = dict([(cookie["name"], cookie["value"]) for cookie in driver.get_cookies()]) + user_agent = driver.execute_script("return navigator.userAgent") + + parse = urlparse(url) + return Session( + cookies=cookies, + headers={ + 'accept': '*/*', + 'authority': parse.netloc, + 'origin': f'{parse.scheme}://{parse.netloc}', + 'referer': url, + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': user_agent + }, + proxies={"https": proxy, "http": proxy}, + timeout=timeout, + impersonate="chrome110" + ) diff --git a/g4f/webdriver.py b/g4f/webdriver.py new file mode 100644 index 00000000..288eed0e --- /dev/null +++ b/g4f/webdriver.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import time +from platformdirs import user_config_dir +from selenium.webdriver.remote.webdriver import WebDriver +from undetected_chromedriver import Chrome, ChromeOptions + +try: + from pyvirtualdisplay import Display + has_pyvirtualdisplay = True +except ImportError: + has_pyvirtualdisplay = False + +def get_browser( + user_data_dir: str = None, + headless: bool = False, + proxy: str = None, + options: ChromeOptions = None +) -> WebDriver: + if user_data_dir == None: + user_data_dir = user_config_dir("g4f") + if not options: + options = ChromeOptions() + options.add_argument("window-size=1920,1080"); + if proxy: + options.add_argument(f'--proxy-server={proxy}') + return Chrome(options=options, user_data_dir=user_data_dir, headless=headless) + +class WebDriverSession(): + def __init__( + self, + webdriver: WebDriver = None, + user_data_dir: str = None, + headless: bool = False, + virtual_display: bool = False, + proxy: str = None, + options: ChromeOptions = None + ): + self.webdriver = webdriver + self.user_data_dir = user_data_dir + self.headless = headless + self.virtual_display = None + if has_pyvirtualdisplay and virtual_display: + self.virtual_display = Display(size=(1920,1080)) + self.proxy = proxy + self.options = options + self.default_driver = None + + def reopen( + self, + user_data_dir: str = None, + headless: bool = False, + virtual_display: bool = False + ) -> WebDriver: + if user_data_dir == None: + user_data_dir = self.user_data_dir + if self.default_driver: + self.default_driver.quit() + if not virtual_display and self.virtual_display: + self.virtual_display.stop() + self.virtual_display = None + self.default_driver = get_browser(user_data_dir, headless, self.proxy) + return self.default_driver + + def __enter__(self) -> WebDriver: + if self.webdriver: + return self.webdriver + if self.virtual_display: + self.virtual_display.start() + self.default_driver = get_browser(self.user_data_dir, self.headless, self.proxy, self.options) + return self.default_driver + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.default_driver: + try: + self.default_driver.close() + except: + pass + time.sleep(0.1) + self.default_driver.quit() + if self.virtual_display: + self.virtual_display.stop() \ No newline at end of file -- cgit v1.2.3