From 1ae2c3b1b5e2c28142adb21138aa422f7db730e9 Mon Sep 17 00:00:00 2001 From: Rapahel Lauray Date: Mon, 16 Oct 2023 00:17:27 +0200 Subject: [PATCH 1/2] recuperation data et return formation --- main5.py => datareader.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename main5.py => datareader.py (100%) diff --git a/main5.py b/datareader.py similarity index 100% rename from main5.py rename to datareader.py -- 2.43.4 From 020d4332030750b46766d913d9c0aa0a45346737 Mon Sep 17 00:00:00 2001 From: Rapahel Lauray Date: Mon, 16 Oct 2023 00:19:51 +0200 Subject: [PATCH 2/2] rectification erreur fichier --- datareader.py | 58 +++++++++++++++++---------------------------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/datareader.py b/datareader.py index a5019b3..80c62c5 100644 --- a/datareader.py +++ b/datareader.py @@ -1,45 +1,25 @@ -import jaro -import json +import requests, json, jaro, zipfile, os + +#Get the name from the web site parcourstup +r = requests.get("https://ressource.parcoursup.fr/data/files.xml") +name = r.content[102:135].decode() +print(f"https://ressource.parcoursup.fr/data/{name}.zip") + +# Download the dataset if it doesn't already exist +if not os.path.exists(name): + r = requests.get(f"https://ressource.parcoursup.fr/data/{name}.zip") + with open('data.zip', 'wb') as file: + file.write(r.content) + with zipfile.ZipFile('data.zip', 'r') as zip_ref: + zip_ref.extractall() # Load database print("Loading database ...") -with open("data.json", "r") as file: +with open(name, "r") as file: database = json.loads(file.read()) +def get_formations(prompt: str): + indices = [max([(jaro.jaro_winkler_metric(actual_word, word), index) for word, index in database["lexique"]["index"].items()], key=lambda x: x[0])[1] for actual_word in prompt.split()] + return [f"{formation['nm']} - {list(formation['recS'].keys())[-1]}" for formation in database["formations"].values() if all([str(index) in formation["recW"] for index in indices])] -prompt = input("name > ") -tokens = prompt.split() - -# Finding possible words index -possible_words = [] - -for t in tokens: - found_words = [] - for w in database["lexique"]["index"].keys(): - if jaro.jaro_winkler_metric(w, t) > 0.93: - index = database["lexique"]["index"][w] - if index not in found_words: - found_words.append(index) - if found_words: - possible_words.append(found_words) - -print(possible_words) - -# Test every formation -to_test = [] -test_passed = [] - -for f in database["formations"].keys(): - checked_count = 0 - for ti in database["formations"][f]["recW"].keys(): - for found_index in possible_words: - if int(ti) in found_index: - checked_count += 1 - break - if checked_count == len(possible_words): - test_passed.append(f) - - -# Print the result -for f in test_passed: - print(database["formations"][f]["nm"], "at :", list(database["formations"][f]["recS"].keys())[1]) +print(get_formations(input("Votre projet > "))) \ No newline at end of file -- 2.43.4