diff --git a/datareader.py b/datareader.py new file mode 100644 index 0000000..80c62c5 --- /dev/null +++ b/datareader.py @@ -0,0 +1,25 @@ +import requests, json, jaro, zipfile, os + +#Get the name from the web site parcourstup +r = requests.get("https://ressource.parcoursup.fr/data/files.xml") +name = r.content[102:135].decode() +print(f"https://ressource.parcoursup.fr/data/{name}.zip") + +# Download the dataset if it doesn't already exist +if not os.path.exists(name): + r = requests.get(f"https://ressource.parcoursup.fr/data/{name}.zip") + with open('data.zip', 'wb') as file: + file.write(r.content) + with zipfile.ZipFile('data.zip', 'r') as zip_ref: + zip_ref.extractall() + +# Load database +print("Loading database ...") +with open(name, "r") as file: + database = json.loads(file.read()) + +def get_formations(prompt: str): + indices = [max([(jaro.jaro_winkler_metric(actual_word, word), index) for word, index in database["lexique"]["index"].items()], key=lambda x: x[0])[1] for actual_word in prompt.split()] + return [f"{formation['nm']} - {list(formation['recS'].keys())[-1]}" for formation in database["formations"].values() if all([str(index) in formation["recW"] for index in indices])] + +print(get_formations(input("Votre projet > "))) \ No newline at end of file diff --git a/main5.py b/main5.py deleted file mode 100644 index a5019b3..0000000 --- a/main5.py +++ /dev/null @@ -1,45 +0,0 @@ -import jaro -import json - -# Load database -print("Loading database ...") -with open("data.json", "r") as file: - database = json.loads(file.read()) - - -prompt = input("name > ") -tokens = prompt.split() - -# Finding possible words index -possible_words = [] - -for t in tokens: - found_words = [] - for w in database["lexique"]["index"].keys(): - if jaro.jaro_winkler_metric(w, t) > 0.93: - index = database["lexique"]["index"][w] - if index not in found_words: - found_words.append(index) - if found_words: - possible_words.append(found_words) - -print(possible_words) - -# Test every formation -to_test = [] -test_passed = [] - -for f in database["formations"].keys(): - checked_count = 0 - for ti in database["formations"][f]["recW"].keys(): - for found_index in possible_words: - if int(ti) in found_index: - checked_count += 1 - break - if checked_count == len(possible_words): - test_passed.append(f) - - -# Print the result -for f in test_passed: - print(database["formations"][f]["nm"], "at :", list(database["formations"][f]["recS"].keys())[1])