diff --git a/.drone.yml b/.drone.yml index 33c3d29..a8e2af3 100644 --- a/.drone.yml +++ b/.drone.yml @@ -43,7 +43,7 @@ steps: - docker stop parser || true - docker rm parser || true - docker pull gitea.allowlgroup.ru/allowlgroup/parser:latest - - docker run -d --name parser --network host -p 8001:8001 -v /opt/parser_data:/app/documents gitea.allowlgroup.ru/allowlgroup/parser:latest + - docker run -d --name parser -p 8001:8001 -v /opt/parser_data:/app/documents gitea.allowlgroup.ru/allowlgroup/parser:latest when: branch: - main diff --git a/main.py b/main.py index 468e333..5baa053 100644 --- a/main.py +++ b/main.py @@ -206,14 +206,16 @@ def extract_text_from_url(url, timeout=10, verify=True): return "\n".join(content_text), time_t -# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484') +GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://45.129.78.228:8484') # GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://172.17.0.1:8484') -GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484') +# GPT_SERVER_URL = os.getenv('GPT_SERVER_URL', 'http://127.0.0.1:8484') def gpt_response_message(content: str, name_promt: str): contentGPT = wp.get_promt(name_promt).replace('{content}', content) url = GPT_SERVER_URL params = {'text': contentGPT} + gpt_proxies_list = download_proxies(GPT_PROXIES_URL) + gpt_proxies_list = get_shuffled_proxies(gpt_proxies_list) max_retries = 5 retries = 0 @@ -399,7 +401,7 @@ def start_pars_two_istochnik(): #Функции start любого источника def start_pars_all_istochnik(url:str, promt:str): - # print(f"Начало парсинга: {url} с промтом: {promt}") + print(f"Начало парсинга: {url} с промтом: {promt}") task_id = wp.insert_task(status='queued', source_url=url) try: @@ -435,10 +437,10 @@ def start_pars_all_istochnik(url:str, promt:str): article = Article(abs_url) article.download() article.parse() - + print("URL:", abs_url) if len(article.text) > 200 and article.publish_date: time_text = article.publish_date.strftime("%Y/%m/%d %H:%M:%S") - print("URL:", abs_url) + # print("Заголовок:", article.title) # print("Дата публикации:", time_text) # print("Текст статьи:", article.text) @@ -486,7 +488,6 @@ class Parserall(BaseModel): @app.post("/add_sources" , summary="Добавление парсинга любого источника") async def add_sources_all_ist(sources: Parserall): - result = wp.add_sources(str(sources.url), sources.promt) return {"status": "success", "message": "Источник добавлен", "data": result}