это рабочий проект
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-04-17 20:24:06 +10:00
parent 343ef43079
commit db646b3ce3

89
main.py
View File

@@ -32,11 +32,6 @@ import work_parser as wp
DOCUMENTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "documents")
PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt"
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
GPT_PROXIES_URL = os.getenv('GPT_PROXIES_URL', PROXIES_URL)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Управление жизненным циклом приложения"""
@@ -72,6 +67,8 @@ app.add_middleware(
# expose_headers=["*"],
)
PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt"
def download_proxies(url):
response = requests.get(url)
if response.status_code == 200:
@@ -209,59 +206,40 @@ def extract_text_from_url(url, timeout=10, verify=True):
return "\n".join(content_text), time_t
GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484')
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484')
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484')
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
def gpt_response_message(content: str, name_promt: str):
contentGPT = wp.get_promt(name_promt).replace('{content}', content)
url = GPT_SERVER_URL
params = {'text': contentGPT}
# Получаем список прокси для GPT (если настроен)
gpt_proxies_list = download_proxies(GPT_PROXIES_URL)
gpt_proxies_list = get_shuffled_proxies(gpt_proxies_list)
max_retries = 5
retries = 0
while retries < max_retries:
print(f"Попытка подключения к GPT через прокси ({len(gpt_proxies_list)} прокси доступны)")
for proxy in gpt_proxies_list:
try:
proxies = {
'http': f'http://{proxy}',
'https': f'http://{proxy}',
}
print(f"Прокси: {proxy}")
response = requests.get(url, params=params, timeout=60, proxies=proxies)
if response.status_code == 200:
print(f"✓ GPT-сервис ответил через прокси: {response.status_code}")
response = requests.get(url, params=params, timeout=60)
return response.text
except requests.exceptions.ConnectTimeout as e:
print(f"Ошибка подключения (timeout): {e}")
logger.warning(f"gpt_response_message timeout:") #{e}")
retries += 1
if retries < max_retries:
time.sleep(2 ** (retries - 1))
except requests.exceptions.ConnectionError as e:
print(f"Ошибка соединения: {e}")
logger.warning(f"gpt_response_message connection error: ") #{e}")
retries += 1
if retries < max_retries:
time.sleep(2 ** (retries - 1))
except Exception as ex:
print(f"Ошибка через прокси {proxy}: {ex}")
continue
# try:
# response = requests.get(url, params=params, timeout=60)
# return response.text
# except requests.exceptions.ConnectTimeout as e:
# print(f"Ошибка подключения (timeout): {e}")
# logger.warning(f"gpt_response_message timeout:") #{e}")
# retries += 1
# if retries < max_retries:
# time.sleep(2 ** (retries - 1))
# except requests.exceptions.ConnectionError as e:
# print(f"Ошибка соединения: {e}")
# logger.warning(f"gpt_response_message connection error: ") #{e}")
# retries += 1
# if retries < max_retries:
# time.sleep(2 ** (retries - 1))
# except Exception as ex:
# print(f"Ошибка при запросе к GPT: {ex}")
# logger.error(f"gpt_response_message: ") #{ex}")
# retries += 1
# if retries < max_retries:
# time.sleep(2 ** (retries - 1))
print(f"Ошибка при запросе к GPT: {ex}")
logger.error(f"gpt_response_message: ") #{ex}")
retries += 1
if retries < max_retries:
time.sleep(2 ** (retries - 1))
logger.info(f"Привышен лимит запросов {max_retries}")
return ""
@@ -424,25 +402,6 @@ def start_pars_all_istochnik(url:str, promt:str):
# print(f"Начало парсинга: {url} с промтом: {promt}")
task_id = wp.insert_task(status='queued', source_url=url)
# Прокси
# proxies_list = download_proxies(PROXIES_URL)
# proxies_list = get_shuffled_proxies(proxies_list)
# # Загружаем главную страницу через прокси
# main_response = None
# for proxy in proxies_list:
# main_response = fetch_with_proxy(url, proxy=proxy, timeout=30, verify=True)
# if main_response:
# break
# else:
# try:
# main_response = requests.get(url, timeout=30).text
# except requests.RequestException as e:
# print(f"Ошибка при загрузке страницы {url}: {e}")
# wp.update_task(task_id, status='failed', finished_at=datetime.utcnow())
# return set()
# soup = BeautifulSoup(main_response, 'html.parser')
try:
response = requests.get(url)
response.raise_for_status()
@@ -672,6 +631,6 @@ async def get_logs():
lines = file.readlines()[-10:] # последние 10 строк
return {"logs": lines}
if __name__ == "__main__":
uvicorn.run("main:app", port=8001, reload=True)
# if __name__ == "__main__":
# uvicorn.run("main:app", port=8001, reload=True)