From 5756586cde6ed6da147119113fb5a5fd640d5f83 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Sun, 14 Jan 2024 07:45:41 +0100 Subject: Refactor code with AI Add doctypes to many functions Add file upload for text files Add alternative url to FreeChatgpt Add webp to allowed image types --- g4f/webdriver.py | 76 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 23 deletions(-) (limited to 'g4f/webdriver.py') diff --git a/g4f/webdriver.py b/g4f/webdriver.py index da283409..e5ecd8bf 100644 --- a/g4f/webdriver.py +++ b/g4f/webdriver.py @@ -1,5 +1,4 @@ from __future__ import annotations - from platformdirs import user_config_dir from selenium.webdriver.remote.webdriver import WebDriver from undetected_chromedriver import Chrome, ChromeOptions @@ -21,7 +20,16 @@ def get_browser( proxy: str = None, options: ChromeOptions = None ) -> WebDriver: - if user_data_dir == None: + """ + Creates and returns a Chrome WebDriver with the specified options. + + :param user_data_dir: Directory for user data. If None, uses default directory. + :param headless: Boolean indicating whether to run the browser in headless mode. + :param proxy: Proxy settings for the browser. + :param options: ChromeOptions object with specific browser options. + :return: An instance of WebDriver. + """ + if user_data_dir is None: user_data_dir = user_config_dir("g4f") if user_data_dir and debug.logging: print("Open browser with config dir:", user_data_dir) @@ -39,36 +47,45 @@ def get_browser( headless=headless ) -def get_driver_cookies(driver: WebDriver): - return dict([(cookie["name"], cookie["value"]) for cookie in driver.get_cookies()]) +def get_driver_cookies(driver: WebDriver) -> dict: + """ + Retrieves cookies from the given WebDriver. + + :param driver: WebDriver from which to retrieve cookies. + :return: A dictionary of cookies. + """ + return {cookie["name"]: cookie["value"] for cookie in driver.get_cookies()} def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None: - # Open website + """ + Attempts to bypass Cloudflare protection when accessing a URL using the provided WebDriver. + + :param driver: The WebDriver to use. + :param url: URL to access. + :param timeout: Time in seconds to wait for the page to load. + """ driver.get(url) - # Is cloudflare protection if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": if debug.logging: print("Cloudflare protection detected:", url) try: - # Click button in iframe - WebDriverWait(driver, 5).until( - EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe")) - ) driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe")) WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input")) - ) - driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click() - except: - pass + ).click() + except Exception as e: + if debug.logging: + print(f"Error bypassing Cloudflare: {e}") finally: driver.switch_to.default_content() - # No cloudflare protection WebDriverWait(driver, timeout).until( EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)")) ) -class WebDriverSession(): +class WebDriverSession: + """ + Manages a Selenium WebDriver session, including handling of virtual displays and proxies. + """ def __init__( self, webdriver: WebDriver = None, @@ -81,9 +98,7 @@ class WebDriverSession(): self.webdriver = webdriver self.user_data_dir = user_data_dir self.headless = headless - self.virtual_display = None - if has_pyvirtualdisplay and virtual_display: - self.virtual_display = Display(size=(1920, 1080)) + self.virtual_display = Display(size=(1920, 1080)) if has_pyvirtualdisplay and virtual_display else None self.proxy = proxy self.options = options self.default_driver = None @@ -94,8 +109,15 @@ class WebDriverSession(): headless: bool = False, virtual_display: bool = False ) -> WebDriver: - if user_data_dir == None: - user_data_dir = self.user_data_dir + """ + Reopens the WebDriver session with the specified parameters. + + :param user_data_dir: Directory for user data. + :param headless: Boolean indicating whether to run the browser in headless mode. + :param virtual_display: Boolean indicating whether to use a virtual display. + :return: An instance of WebDriver. + """ + user_data_dir = user_data_dir or self.user_data_dir if self.default_driver: self.default_driver.quit() if not virtual_display and self.virtual_display: @@ -105,6 +127,10 @@ class WebDriverSession(): return self.default_driver def __enter__(self) -> WebDriver: + """ + Context management method for entering a session. + :return: An instance of WebDriver. + """ if self.webdriver: return self.webdriver if self.virtual_display: @@ -113,11 +139,15 @@ class WebDriverSession(): return self.default_driver def __exit__(self, exc_type, exc_val, exc_tb): + """ + Context management method for exiting a session. Closes and quits the WebDriver. + """ if self.default_driver: try: self.default_driver.close() - except: - pass + except Exception as e: + if debug.logging: + print(f"Error closing WebDriver: {e}") self.default_driver.quit() if self.virtual_display: self.virtual_display.stop() \ No newline at end of file -- cgit v1.2.3 From 32252def150da94f12d1f3c07f977af6d8931402 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Sun, 14 Jan 2024 15:04:37 +0100 Subject: Change doctypes style to Google Fix typo in latest_version Fix Phind Provider Add unittest worklow and main tests --- g4f/webdriver.py | 75 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 20 deletions(-) (limited to 'g4f/webdriver.py') diff --git a/g4f/webdriver.py b/g4f/webdriver.py index e5ecd8bf..9a83215f 100644 --- a/g4f/webdriver.py +++ b/g4f/webdriver.py @@ -21,13 +21,16 @@ def get_browser( options: ChromeOptions = None ) -> WebDriver: """ - Creates and returns a Chrome WebDriver with the specified options. + Creates and returns a Chrome WebDriver with specified options. - :param user_data_dir: Directory for user data. If None, uses default directory. - :param headless: Boolean indicating whether to run the browser in headless mode. - :param proxy: Proxy settings for the browser. - :param options: ChromeOptions object with specific browser options. - :return: An instance of WebDriver. + Args: + user_data_dir (str, optional): Directory for user data. If None, uses default directory. + headless (bool, optional): Whether to run the browser in headless mode. Defaults to False. + proxy (str, optional): Proxy settings for the browser. Defaults to None. + options (ChromeOptions, optional): ChromeOptions object with specific browser options. Defaults to None. + + Returns: + WebDriver: An instance of WebDriver configured with the specified options. """ if user_data_dir is None: user_data_dir = user_config_dir("g4f") @@ -49,10 +52,13 @@ def get_browser( def get_driver_cookies(driver: WebDriver) -> dict: """ - Retrieves cookies from the given WebDriver. + Retrieves cookies from the specified WebDriver. + + Args: + driver (WebDriver): The WebDriver instance from which to retrieve cookies. - :param driver: WebDriver from which to retrieve cookies. - :return: A dictionary of cookies. + Returns: + dict: A dictionary containing cookies with their names as keys and values as cookie values. """ return {cookie["name"]: cookie["value"] for cookie in driver.get_cookies()} @@ -60,9 +66,13 @@ def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None: """ Attempts to bypass Cloudflare protection when accessing a URL using the provided WebDriver. - :param driver: The WebDriver to use. - :param url: URL to access. - :param timeout: Time in seconds to wait for the page to load. + Args: + driver (WebDriver): The WebDriver to use for accessing the URL. + url (str): The URL to access. + timeout (int): Time in seconds to wait for the page to load. + + Raises: + Exception: If there is an error while bypassing Cloudflare or loading the page. """ driver.get(url) if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": @@ -86,6 +96,7 @@ class WebDriverSession: """ Manages a Selenium WebDriver session, including handling of virtual displays and proxies. """ + def __init__( self, webdriver: WebDriver = None, @@ -95,6 +106,17 @@ class WebDriverSession: proxy: str = None, options: ChromeOptions = None ): + """ + Initializes a new instance of the WebDriverSession. + + Args: + webdriver (WebDriver, optional): A WebDriver instance for the session. Defaults to None. + user_data_dir (str, optional): Directory for user data. Defaults to None. + headless (bool, optional): Whether to run the browser in headless mode. Defaults to False. + virtual_display (bool, optional): Whether to use a virtual display. Defaults to False. + proxy (str, optional): Proxy settings for the browser. Defaults to None. + options (ChromeOptions, optional): ChromeOptions for the browser. Defaults to None. + """ self.webdriver = webdriver self.user_data_dir = user_data_dir self.headless = headless @@ -110,14 +132,17 @@ class WebDriverSession: virtual_display: bool = False ) -> WebDriver: """ - Reopens the WebDriver session with the specified parameters. + Reopens the WebDriver session with new settings. + + Args: + user_data_dir (str, optional): Directory for user data. Defaults to current value. + headless (bool, optional): Whether to run the browser in headless mode. Defaults to current value. + virtual_display (bool, optional): Whether to use a virtual display. Defaults to current value. - :param user_data_dir: Directory for user data. - :param headless: Boolean indicating whether to run the browser in headless mode. - :param virtual_display: Boolean indicating whether to use a virtual display. - :return: An instance of WebDriver. + Returns: + WebDriver: The reopened WebDriver instance. """ - user_data_dir = user_data_dir or self.user_data_dir + user_data_dir = user_data_data_dir or self.user_data_dir if self.default_driver: self.default_driver.quit() if not virtual_display and self.virtual_display: @@ -128,8 +153,10 @@ class WebDriverSession: def __enter__(self) -> WebDriver: """ - Context management method for entering a session. - :return: An instance of WebDriver. + Context management method for entering a session. Initializes and returns a WebDriver instance. + + Returns: + WebDriver: An instance of WebDriver for this session. """ if self.webdriver: return self.webdriver @@ -141,6 +168,14 @@ class WebDriverSession: def __exit__(self, exc_type, exc_val, exc_tb): """ Context management method for exiting a session. Closes and quits the WebDriver. + + Args: + exc_type: Exception type. + exc_val: Exception value. + exc_tb: Exception traceback. + + Note: + Closes the WebDriver and stops the virtual display if used. """ if self.default_driver: try: -- cgit v1.2.3