From 1ae2c3b1b5e2c28142adb21138aa422f7db730e9 Mon Sep 17 00:00:00 2001
From: Rapahel Lauray <r.lauray2@outlook.fr>
Date: Mon, 16 Oct 2023 00:17:27 +0200
Subject: [PATCH 1/2] recuperation data et return formation

---
 main5.py => datareader.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename main5.py => datareader.py (100%)

diff --git a/main5.py b/datareader.py
similarity index 100%
rename from main5.py
rename to datareader.py
-- 
2.43.4


From 020d4332030750b46766d913d9c0aa0a45346737 Mon Sep 17 00:00:00 2001
From: Rapahel Lauray <r.lauray2@outlook.fr>
Date: Mon, 16 Oct 2023 00:19:51 +0200
Subject: [PATCH 2/2] rectification erreur fichier

---
 datareader.py | 58 +++++++++++++++++----------------------------------
 1 file changed, 19 insertions(+), 39 deletions(-)

diff --git a/datareader.py b/datareader.py
index a5019b3..80c62c5 100644
--- a/datareader.py
+++ b/datareader.py
@@ -1,45 +1,25 @@
-import jaro
-import json
+import requests, json, jaro, zipfile, os
+
+#Get the name from the web site parcourstup 
+r = requests.get("https://ressource.parcoursup.fr/data/files.xml")
+name = r.content[102:135].decode()
+print(f"https://ressource.parcoursup.fr/data/{name}.zip")
+
+# Download the dataset if it doesn't already exist
+if not os.path.exists(name):
+    r = requests.get(f"https://ressource.parcoursup.fr/data/{name}.zip")
+    with open('data.zip', 'wb') as file:
+        file.write(r.content)
+    with zipfile.ZipFile('data.zip', 'r') as zip_ref:
+        zip_ref.extractall()
 
 # Load database
 print("Loading database ...")
-with open("data.json", "r") as file:
+with open(name, "r") as file:
     database = json.loads(file.read())
 
+def get_formations(prompt: str):
+    indices = [max([(jaro.jaro_winkler_metric(actual_word, word), index) for word, index in database["lexique"]["index"].items()], key=lambda x: x[0])[1] for actual_word in prompt.split()]
+    return [f"{formation['nm']} - {list(formation['recS'].keys())[-1]}" for formation in database["formations"].values() if all([str(index) in formation["recW"] for index in indices])]
 
-prompt = input("name > ")
-tokens = prompt.split()
-
-#  Finding possible words index
-possible_words = []
-
-for t in tokens:
-    found_words = []
-    for w in database["lexique"]["index"].keys():
-        if jaro.jaro_winkler_metric(w, t) > 0.93:
-            index = database["lexique"]["index"][w]
-            if index not in found_words:
-                found_words.append(index)
-    if found_words:
-        possible_words.append(found_words)
-
-print(possible_words)
-
-# Test every formation
-to_test = []
-test_passed = []
-
-for f in database["formations"].keys():
-    checked_count = 0
-    for ti in database["formations"][f]["recW"].keys():
-        for found_index in possible_words:
-            if int(ti) in found_index:
-                checked_count += 1
-                break
-    if checked_count == len(possible_words):
-        test_passed.append(f)
-
-
-#  Print the result
-for f in test_passed:
-    print(database["formations"][f]["nm"], "at :", list(database["formations"][f]["recS"].keys())[1])
+print(get_formations(input("Votre projet > ")))
\ No newline at end of file
-- 
2.43.4