From db646b3ce3add3d2e85dad687c69f4a126f8dd26 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 17 Apr 2026 20:24:06 +1000 Subject: [PATCH] =?UTF-8?q?=D1=8D=D1=82=D0=BE=20=D1=80=D0=B0=D0=B1=D0=BE?= =?UTF-8?q?=D1=87=D0=B8=D0=B9=20=D0=BF=D1=80=D0=BE=D0=B5=D0=BA=D1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 95 ++++++++++++++++----------------------------------------- 1 file changed, 27 insertions(+), 68 deletions(-) diff --git a/main.py b/main.py index a4b4cd0..468e333 100644 --- a/main.py +++ b/main.py @@ -32,11 +32,6 @@ import work_parser as wp DOCUMENTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "documents") -PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt" -# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484') -GPT_PROXIES_URL = os.getenv('GPT_PROXIES_URL', PROXIES_URL) - - @asynccontextmanager async def lifespan(app: FastAPI): """Управление жизненным циклом приложения""" @@ -72,6 +67,8 @@ app.add_middleware( # expose_headers=["*"], ) +PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt" + def download_proxies(url): response = requests.get(url) if response.status_code == 200: @@ -209,59 +206,40 @@ def extract_text_from_url(url, timeout=10, verify=True): return "\n".join(content_text), time_t -GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484') +# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484') # GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484') -# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484') +GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484') def gpt_response_message(content: str, name_promt: str): contentGPT = wp.get_promt(name_promt).replace('{content}', content) url = GPT_SERVER_URL params = {'text': contentGPT} - # Получаем список прокси для GPT (если настроен) - gpt_proxies_list = download_proxies(GPT_PROXIES_URL) - gpt_proxies_list = get_shuffled_proxies(gpt_proxies_list) - max_retries = 5 retries = 0 while retries < max_retries: - print(f"Попытка подключения к GPT через прокси ({len(gpt_proxies_list)} прокси доступны)") - for proxy in gpt_proxies_list: - try: - proxies = { - 'http': f'http://{proxy}', - 'https': f'http://{proxy}', - } - print(f"Прокси: {proxy}") - response = requests.get(url, params=params, timeout=60, proxies=proxies) - if response.status_code == 200: - print(f"✓ GPT-сервис ответил через прокси: {response.status_code}") - return response.text - except Exception as ex: - print(f"Ошибка через прокси {proxy}: {ex}") - continue - # try: - # response = requests.get(url, params=params, timeout=60) - # return response.text - # except requests.exceptions.ConnectTimeout as e: - # print(f"Ошибка подключения (timeout): {e}") - # logger.warning(f"gpt_response_message timeout:") #{e}") - # retries += 1 - # if retries < max_retries: - # time.sleep(2 ** (retries - 1)) - # except requests.exceptions.ConnectionError as e: - # print(f"Ошибка соединения: {e}") - # logger.warning(f"gpt_response_message connection error: ") #{e}") - # retries += 1 - # if retries < max_retries: - # time.sleep(2 ** (retries - 1)) - # except Exception as ex: - # print(f"Ошибка при запросе к GPT: {ex}") - # logger.error(f"gpt_response_message: ") #{ex}") - # retries += 1 - # if retries < max_retries: - # time.sleep(2 ** (retries - 1)) + try: + response = requests.get(url, params=params, timeout=60) + return response.text + except requests.exceptions.ConnectTimeout as e: + print(f"Ошибка подключения (timeout): {e}") + logger.warning(f"gpt_response_message timeout:") #{e}") + retries += 1 + if retries < max_retries: + time.sleep(2 ** (retries - 1)) + except requests.exceptions.ConnectionError as e: + print(f"Ошибка соединения: {e}") + logger.warning(f"gpt_response_message connection error: ") #{e}") + retries += 1 + if retries < max_retries: + time.sleep(2 ** (retries - 1)) + except Exception as ex: + print(f"Ошибка при запросе к GPT: {ex}") + logger.error(f"gpt_response_message: ") #{ex}") + retries += 1 + if retries < max_retries: + time.sleep(2 ** (retries - 1)) logger.info(f"Привышен лимит запросов {max_retries}") return "" @@ -424,25 +402,6 @@ def start_pars_all_istochnik(url:str, promt:str): # print(f"Начало парсинга: {url} с промтом: {promt}") task_id = wp.insert_task(status='queued', source_url=url) - # Прокси - # proxies_list = download_proxies(PROXIES_URL) - # proxies_list = get_shuffled_proxies(proxies_list) - - # # Загружаем главную страницу через прокси - # main_response = None - # for proxy in proxies_list: - # main_response = fetch_with_proxy(url, proxy=proxy, timeout=30, verify=True) - # if main_response: - # break - # else: - # try: - # main_response = requests.get(url, timeout=30).text - # except requests.RequestException as e: - # print(f"Ошибка при загрузке страницы {url}: {e}") - # wp.update_task(task_id, status='failed', finished_at=datetime.utcnow()) - # return set() - # soup = BeautifulSoup(main_response, 'html.parser') - try: response = requests.get(url) response.raise_for_status() @@ -672,6 +631,6 @@ async def get_logs(): lines = file.readlines()[-10:] # последние 10 строк return {"logs": lines} -if __name__ == "__main__": - uvicorn.run("main:app", port=8001, reload=True) +# if __name__ == "__main__": +# uvicorn.run("main:app", port=8001, reload=True)