diff options
Diffstat (limited to 'scripts/scraping')
-rw-r--r-- | scripts/scraping/ai_scraping.py | 19 | ||||
-rw-r--r-- | scripts/scraping/scraper.py | 3 |
2 files changed, 20 insertions, 2 deletions
diff --git a/scripts/scraping/ai_scraping.py b/scripts/scraping/ai_scraping.py new file mode 100644 index 0000000..b11a12b --- /dev/null +++ b/scripts/scraping/ai_scraping.py @@ -0,0 +1,19 @@ +from google import genai +import json +import requests +import Constants +import AIParams + +client = genai.Client(api_key="AIzaSyAdB7yo0qcnwHeC4T2rRaSXD588JRw94oQ") + +def run_ai_query(url): + req_url = f"https://r.jina.ai/{url}" + res = requests.get(req_url) + markdown_content = res.text + + prompt = AIParams.PROMPT.format(schema=AIParams.RECIPE_SCHEMA, markdown=markdown_content) + + ai_res = client.models.generate_content(model="gemini-2.0-flash", contents=prompt) + cleaned_text = ai_res.text.strip("```").strip("```json") + recipe_json = json.loads(cleaned_text) + return {"success": True, "data": recipe_json} diff --git a/scripts/scraping/scraper.py b/scripts/scraping/scraper.py index 8919d46..0e0b9c8 100644 --- a/scripts/scraping/scraper.py +++ b/scripts/scraping/scraper.py @@ -25,7 +25,7 @@ def extractInstructions(instructions): return returnedInstructions -def scrape(url, user_name): +def scrape(url): try: data = requests.get(url, headers= {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}) html = BeautifulSoup(data.text, 'html.parser') @@ -43,7 +43,6 @@ def scrape(url, user_name): slug = parse.quote(i["name"]).lower() # The recipe - recipe["user"] = user_name recipe["slug"] = slug recipe["title"] = i["name"] recipe["image"] = i["image"][0] |