From 2e0b9c97af457da5c6afda611d48e59047d4cdb8 Mon Sep 17 00:00:00 2001 From: JJ Date: Mon, 31 Mar 2025 21:08:41 +0100 Subject: Basic AI functionality --- scripts/scraping/ai_scraping.py | 19 +++++++++++++++++++ scripts/scraping/scraper.py | 3 +-- 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 scripts/scraping/ai_scraping.py (limited to 'scripts/scraping') diff --git a/scripts/scraping/ai_scraping.py b/scripts/scraping/ai_scraping.py new file mode 100644 index 0000000..b11a12b --- /dev/null +++ b/scripts/scraping/ai_scraping.py @@ -0,0 +1,19 @@ +from google import genai +import json +import requests +import Constants +import AIParams + +client = genai.Client(api_key="AIzaSyAdB7yo0qcnwHeC4T2rRaSXD588JRw94oQ") + +def run_ai_query(url): + req_url = f"https://r.jina.ai/{url}" + res = requests.get(req_url) + markdown_content = res.text + + prompt = AIParams.PROMPT.format(schema=AIParams.RECIPE_SCHEMA, markdown=markdown_content) + + ai_res = client.models.generate_content(model="gemini-2.0-flash", contents=prompt) + cleaned_text = ai_res.text.strip("```").strip("```json") + recipe_json = json.loads(cleaned_text) + return {"success": True, "data": recipe_json} diff --git a/scripts/scraping/scraper.py b/scripts/scraping/scraper.py index 8919d46..0e0b9c8 100644 --- a/scripts/scraping/scraper.py +++ b/scripts/scraping/scraper.py @@ -25,7 +25,7 @@ def extractInstructions(instructions): return returnedInstructions -def scrape(url, user_name): +def scrape(url): try: data = requests.get(url, headers= {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}) html = BeautifulSoup(data.text, 'html.parser') @@ -43,7 +43,6 @@ def scrape(url, user_name): slug = parse.quote(i["name"]).lower() # The recipe - recipe["user"] = user_name recipe["slug"] = slug recipe["title"] = i["name"] recipe["image"] = i["image"][0] -- cgit v1.2.3