import requests, json, jaro, zipfile, os #Get the name from the web site parcourstup r = requests.get("https://ressource.parcoursup.fr/data/files.xml") name = r.content[102:135].decode() print(f"https://ressource.parcoursup.fr/data/{name}.zip") # Download the dataset if it doesn't already exist if not os.path.exists(name): r = requests.get(f"https://ressource.parcoursup.fr/data/{name}.zip") with open('data.zip', 'wb') as file: file.write(r.content) with zipfile.ZipFile('data.zip', 'r') as zip_ref: zip_ref.extractall() # Load database print("Loading database ...") with open(name, "r") as file: database = json.loads(file.read()) def get_formations(prompt: str): indices = [max([(jaro.jaro_winkler_metric(actual_word, word), index) for word, index in database["lexique"]["index"].items()], key=lambda x: x[0])[1] for actual_word in prompt.split()] return [f"{formation['nm']} - {list(formation['recS'].keys())[-1]}" for formation in database["formations"].values() if all([str(index) in formation["recW"] for index in indices])] print(get_formations(input("Votre projet > ")))