это рабочий проект
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-04-17 20:24:06 +10:00
parent 343ef43079
commit db646b3ce3

89
main.py
View File

@@ -32,11 +32,6 @@ import work_parser as wp
DOCUMENTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "documents") DOCUMENTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "documents")
PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt"
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
GPT_PROXIES_URL = os.getenv('GPT_PROXIES_URL', PROXIES_URL)
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""Управление жизненным циклом приложения""" """Управление жизненным циклом приложения"""
@@ -72,6 +67,8 @@ app.add_middleware(
# expose_headers=["*"], # expose_headers=["*"],
) )
PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt"
def download_proxies(url): def download_proxies(url):
response = requests.get(url) response = requests.get(url)
if response.status_code == 200: if response.status_code == 200:
@@ -209,59 +206,40 @@ def extract_text_from_url(url, timeout=10, verify=True):
return "\n".join(content_text), time_t return "\n".join(content_text), time_t
GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484') # GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484')
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484') # GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484')
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484') GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
def gpt_response_message(content: str, name_promt: str): def gpt_response_message(content: str, name_promt: str):
contentGPT = wp.get_promt(name_promt).replace('{content}', content) contentGPT = wp.get_promt(name_promt).replace('{content}', content)
url = GPT_SERVER_URL url = GPT_SERVER_URL
params = {'text': contentGPT} params = {'text': contentGPT}
# Получаем список прокси для GPT (если настроен)
gpt_proxies_list = download_proxies(GPT_PROXIES_URL)
gpt_proxies_list = get_shuffled_proxies(gpt_proxies_list)
max_retries = 5 max_retries = 5
retries = 0 retries = 0
while retries < max_retries: while retries < max_retries:
print(f"Попытка подключения к GPT через прокси ({len(gpt_proxies_list)} прокси доступны)")
for proxy in gpt_proxies_list:
try: try:
proxies = { response = requests.get(url, params=params, timeout=60)
'http': f'http://{proxy}',
'https': f'http://{proxy}',
}
print(f"Прокси: {proxy}")
response = requests.get(url, params=params, timeout=60, proxies=proxies)
if response.status_code == 200:
print(f"✓ GPT-сервис ответил через прокси: {response.status_code}")
return response.text return response.text
except requests.exceptions.ConnectTimeout as e:
print(f"Ошибка подключения (timeout): {e}")
logger.warning(f"gpt_response_message timeout:") #{e}")
retries += 1
if retries < max_retries:
time.sleep(2 ** (retries - 1))
except requests.exceptions.ConnectionError as e:
print(f"Ошибка соединения: {e}")
logger.warning(f"gpt_response_message connection error: ") #{e}")
retries += 1
if retries < max_retries:
time.sleep(2 ** (retries - 1))
except Exception as ex: except Exception as ex:
print(f"Ошибка через прокси {proxy}: {ex}") print(f"Ошибка при запросе к GPT: {ex}")
continue logger.error(f"gpt_response_message: ") #{ex}")
# try: retries += 1
# response = requests.get(url, params=params, timeout=60) if retries < max_retries:
# return response.text time.sleep(2 ** (retries - 1))
# except requests.exceptions.ConnectTimeout as e:
# print(f"Ошибка подключения (timeout): {e}")
# logger.warning(f"gpt_response_message timeout:") #{e}")
# retries += 1
# if retries < max_retries:
# time.sleep(2 ** (retries - 1))
# except requests.exceptions.ConnectionError as e:
# print(f"Ошибка соединения: {e}")
# logger.warning(f"gpt_response_message connection error: ") #{e}")
# retries += 1
# if retries < max_retries:
# time.sleep(2 ** (retries - 1))
# except Exception as ex:
# print(f"Ошибка при запросе к GPT: {ex}")
# logger.error(f"gpt_response_message: ") #{ex}")
# retries += 1
# if retries < max_retries:
# time.sleep(2 ** (retries - 1))
logger.info(f"Привышен лимит запросов {max_retries}") logger.info(f"Привышен лимит запросов {max_retries}")
return "" return ""
@@ -424,25 +402,6 @@ def start_pars_all_istochnik(url:str, promt:str):
# print(f"Начало парсинга: {url} с промтом: {promt}") # print(f"Начало парсинга: {url} с промтом: {promt}")
task_id = wp.insert_task(status='queued', source_url=url) task_id = wp.insert_task(status='queued', source_url=url)
# Прокси
# proxies_list = download_proxies(PROXIES_URL)
# proxies_list = get_shuffled_proxies(proxies_list)
# # Загружаем главную страницу через прокси
# main_response = None
# for proxy in proxies_list:
# main_response = fetch_with_proxy(url, proxy=proxy, timeout=30, verify=True)
# if main_response:
# break
# else:
# try:
# main_response = requests.get(url, timeout=30).text
# except requests.RequestException as e:
# print(f"Ошибка при загрузке страницы {url}: {e}")
# wp.update_task(task_id, status='failed', finished_at=datetime.utcnow())
# return set()
# soup = BeautifulSoup(main_response, 'html.parser')
try: try:
response = requests.get(url) response = requests.get(url)
response.raise_for_status() response.raise_for_status()
@@ -672,6 +631,6 @@ async def get_logs():
lines = file.readlines()[-10:] # последние 10 строк lines = file.readlines()[-10:] # последние 10 строк
return {"logs": lines} return {"logs": lines}
if __name__ == "__main__": # if __name__ == "__main__":
uvicorn.run("main:app", port=8001, reload=True) # uvicorn.run("main:app", port=8001, reload=True)