This commit is contained in:
89
main.py
89
main.py
@@ -32,11 +32,6 @@ import work_parser as wp
|
|||||||
|
|
||||||
DOCUMENTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "documents")
|
DOCUMENTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "documents")
|
||||||
|
|
||||||
PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt"
|
|
||||||
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
|
|
||||||
GPT_PROXIES_URL = os.getenv('GPT_PROXIES_URL', PROXIES_URL)
|
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
"""Управление жизненным циклом приложения"""
|
"""Управление жизненным циклом приложения"""
|
||||||
@@ -72,6 +67,8 @@ app.add_middleware(
|
|||||||
# expose_headers=["*"],
|
# expose_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PROXIES_URL = "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt"
|
||||||
|
|
||||||
def download_proxies(url):
|
def download_proxies(url):
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
@@ -209,59 +206,40 @@ def extract_text_from_url(url, timeout=10, verify=True):
|
|||||||
|
|
||||||
return "\n".join(content_text), time_t
|
return "\n".join(content_text), time_t
|
||||||
|
|
||||||
GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484')
|
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484')
|
||||||
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484')
|
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484')
|
||||||
# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
|
GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484')
|
||||||
def gpt_response_message(content: str, name_promt: str):
|
def gpt_response_message(content: str, name_promt: str):
|
||||||
contentGPT = wp.get_promt(name_promt).replace('{content}', content)
|
contentGPT = wp.get_promt(name_promt).replace('{content}', content)
|
||||||
|
|
||||||
url = GPT_SERVER_URL
|
url = GPT_SERVER_URL
|
||||||
params = {'text': contentGPT}
|
params = {'text': contentGPT}
|
||||||
|
|
||||||
# Получаем список прокси для GPT (если настроен)
|
|
||||||
gpt_proxies_list = download_proxies(GPT_PROXIES_URL)
|
|
||||||
gpt_proxies_list = get_shuffled_proxies(gpt_proxies_list)
|
|
||||||
|
|
||||||
max_retries = 5
|
max_retries = 5
|
||||||
retries = 0
|
retries = 0
|
||||||
|
|
||||||
while retries < max_retries:
|
while retries < max_retries:
|
||||||
print(f"Попытка подключения к GPT через прокси ({len(gpt_proxies_list)} прокси доступны)")
|
|
||||||
for proxy in gpt_proxies_list:
|
|
||||||
try:
|
try:
|
||||||
proxies = {
|
response = requests.get(url, params=params, timeout=60)
|
||||||
'http': f'http://{proxy}',
|
|
||||||
'https': f'http://{proxy}',
|
|
||||||
}
|
|
||||||
print(f"Прокси: {proxy}")
|
|
||||||
response = requests.get(url, params=params, timeout=60, proxies=proxies)
|
|
||||||
if response.status_code == 200:
|
|
||||||
print(f"✓ GPT-сервис ответил через прокси: {response.status_code}")
|
|
||||||
return response.text
|
return response.text
|
||||||
|
except requests.exceptions.ConnectTimeout as e:
|
||||||
|
print(f"Ошибка подключения (timeout): {e}")
|
||||||
|
logger.warning(f"gpt_response_message timeout:") #{e}")
|
||||||
|
retries += 1
|
||||||
|
if retries < max_retries:
|
||||||
|
time.sleep(2 ** (retries - 1))
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
print(f"Ошибка соединения: {e}")
|
||||||
|
logger.warning(f"gpt_response_message connection error: ") #{e}")
|
||||||
|
retries += 1
|
||||||
|
if retries < max_retries:
|
||||||
|
time.sleep(2 ** (retries - 1))
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print(f"Ошибка через прокси {proxy}: {ex}")
|
print(f"Ошибка при запросе к GPT: {ex}")
|
||||||
continue
|
logger.error(f"gpt_response_message: ") #{ex}")
|
||||||
# try:
|
retries += 1
|
||||||
# response = requests.get(url, params=params, timeout=60)
|
if retries < max_retries:
|
||||||
# return response.text
|
time.sleep(2 ** (retries - 1))
|
||||||
# except requests.exceptions.ConnectTimeout as e:
|
|
||||||
# print(f"Ошибка подключения (timeout): {e}")
|
|
||||||
# logger.warning(f"gpt_response_message timeout:") #{e}")
|
|
||||||
# retries += 1
|
|
||||||
# if retries < max_retries:
|
|
||||||
# time.sleep(2 ** (retries - 1))
|
|
||||||
# except requests.exceptions.ConnectionError as e:
|
|
||||||
# print(f"Ошибка соединения: {e}")
|
|
||||||
# logger.warning(f"gpt_response_message connection error: ") #{e}")
|
|
||||||
# retries += 1
|
|
||||||
# if retries < max_retries:
|
|
||||||
# time.sleep(2 ** (retries - 1))
|
|
||||||
# except Exception as ex:
|
|
||||||
# print(f"Ошибка при запросе к GPT: {ex}")
|
|
||||||
# logger.error(f"gpt_response_message: ") #{ex}")
|
|
||||||
# retries += 1
|
|
||||||
# if retries < max_retries:
|
|
||||||
# time.sleep(2 ** (retries - 1))
|
|
||||||
|
|
||||||
logger.info(f"Привышен лимит запросов {max_retries}")
|
logger.info(f"Привышен лимит запросов {max_retries}")
|
||||||
return ""
|
return ""
|
||||||
@@ -424,25 +402,6 @@ def start_pars_all_istochnik(url:str, promt:str):
|
|||||||
# print(f"Начало парсинга: {url} с промтом: {promt}")
|
# print(f"Начало парсинга: {url} с промтом: {promt}")
|
||||||
task_id = wp.insert_task(status='queued', source_url=url)
|
task_id = wp.insert_task(status='queued', source_url=url)
|
||||||
|
|
||||||
# Прокси
|
|
||||||
# proxies_list = download_proxies(PROXIES_URL)
|
|
||||||
# proxies_list = get_shuffled_proxies(proxies_list)
|
|
||||||
|
|
||||||
# # Загружаем главную страницу через прокси
|
|
||||||
# main_response = None
|
|
||||||
# for proxy in proxies_list:
|
|
||||||
# main_response = fetch_with_proxy(url, proxy=proxy, timeout=30, verify=True)
|
|
||||||
# if main_response:
|
|
||||||
# break
|
|
||||||
# else:
|
|
||||||
# try:
|
|
||||||
# main_response = requests.get(url, timeout=30).text
|
|
||||||
# except requests.RequestException as e:
|
|
||||||
# print(f"Ошибка при загрузке страницы {url}: {e}")
|
|
||||||
# wp.update_task(task_id, status='failed', finished_at=datetime.utcnow())
|
|
||||||
# return set()
|
|
||||||
# soup = BeautifulSoup(main_response, 'html.parser')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@@ -672,6 +631,6 @@ async def get_logs():
|
|||||||
lines = file.readlines()[-10:] # последние 10 строк
|
lines = file.readlines()[-10:] # последние 10 строк
|
||||||
return {"logs": lines}
|
return {"logs": lines}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
uvicorn.run("main:app", port=8001, reload=True)
|
# uvicorn.run("main:app", port=8001, reload=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user