DataReader #2
25
datareader.py
Normal file
25
datareader.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
import requests, json, jaro, zipfile, os
|
||||||
|
|
||||||
|
#Get the name from the web site parcourstup
|
||||||
|
r = requests.get("https://ressource.parcoursup.fr/data/files.xml")
|
||||||
|
name = r.content[102:135].decode()
|
||||||
|
print(f"https://ressource.parcoursup.fr/data/{name}.zip")
|
||||||
|
|
||||||
|
# Download the dataset if it doesn't already exist
|
||||||
|
if not os.path.exists(name):
|
||||||
|
r = requests.get(f"https://ressource.parcoursup.fr/data/{name}.zip")
|
||||||
|
with open('data.zip', 'wb') as file:
|
||||||
|
file.write(r.content)
|
||||||
|
with zipfile.ZipFile('data.zip', 'r') as zip_ref:
|
||||||
|
zip_ref.extractall()
|
||||||
|
|
||||||
|
# Load database
|
||||||
|
print("Loading database ...")
|
||||||
|
with open(name, "r") as file:
|
||||||
|
database = json.loads(file.read())
|
||||||
|
|
||||||
|
def get_formations(prompt: str):
|
||||||
|
indices = [max([(jaro.jaro_winkler_metric(actual_word, word), index) for word, index in database["lexique"]["index"].items()], key=lambda x: x[0])[1] for actual_word in prompt.split()]
|
||||||
|
return [f"{formation['nm']} - {list(formation['recS'].keys())[-1]}" for formation in database["formations"].values() if all([str(index) in formation["recW"] for index in indices])]
|
||||||
|
|
||||||
|
print(get_formations(input("Votre projet > ")))
|
45
main5.py
45
main5.py
|
@ -1,45 +0,0 @@
|
||||||
import jaro
|
|
||||||
import json
|
|
||||||
|
|
||||||
# Load database
|
|
||||||
print("Loading database ...")
|
|
||||||
with open("data.json", "r") as file:
|
|
||||||
database = json.loads(file.read())
|
|
||||||
|
|
||||||
|
|
||||||
prompt = input("name > ")
|
|
||||||
tokens = prompt.split()
|
|
||||||
|
|
||||||
# Finding possible words index
|
|
||||||
possible_words = []
|
|
||||||
|
|
||||||
for t in tokens:
|
|
||||||
found_words = []
|
|
||||||
for w in database["lexique"]["index"].keys():
|
|
||||||
if jaro.jaro_winkler_metric(w, t) > 0.93:
|
|
||||||
index = database["lexique"]["index"][w]
|
|
||||||
if index not in found_words:
|
|
||||||
found_words.append(index)
|
|
||||||
if found_words:
|
|
||||||
possible_words.append(found_words)
|
|
||||||
|
|
||||||
print(possible_words)
|
|
||||||
|
|
||||||
# Test every formation
|
|
||||||
to_test = []
|
|
||||||
test_passed = []
|
|
||||||
|
|
||||||
for f in database["formations"].keys():
|
|
||||||
checked_count = 0
|
|
||||||
for ti in database["formations"][f]["recW"].keys():
|
|
||||||
for found_index in possible_words:
|
|
||||||
if int(ti) in found_index:
|
|
||||||
checked_count += 1
|
|
||||||
break
|
|
||||||
if checked_count == len(possible_words):
|
|
||||||
test_passed.append(f)
|
|
||||||
|
|
||||||
|
|
||||||
# Print the result
|
|
||||||
for f in test_passed:
|
|
||||||
print(database["formations"][f]["nm"], "at :", list(database["formations"][f]["recS"].keys())[1])
|
|
Loading…
Reference in a new issue