Récupération des données des metiers et des formations (sans documentation)

This commit is contained in:
Tipragot 2023-12-24 18:17:12 +01:00
parent 96d645ca75
commit 9354b51f24
3 changed files with 138 additions and 43 deletions

View file

@ -5,11 +5,8 @@ edition = "2021"
[dependencies]
anyhow = "1.0.76"
bytes = "1.5.0"
paste = "1.0.14"
quick-xml = { version = "0.31.0", features = ["serde", "serialize"] }
reqwest = "0.11.23"
serde = { version = "1.0.193", features = ["derive"] }
serde-xml-rs = "0.6.0"
tokio = { version = "1.35.1", features = ["full"] }
zip = "0.6.6"

View file

@ -1,51 +1,144 @@
// https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip
use paste::paste;
use quick_xml::events::Event;
use serde::de::Error;
use serde::{Deserialize, Deserializer};
use std::{
io::{BufReader, Cursor},
os::windows::fs::FileExt,
};
use std::collections::HashMap;
use std::io::{BufReader, Cursor};
use zip::ZipArchive;
macro_rules! unwrap_vec {
($name:ident, $t:ty) => {
paste! {
fn [<unwrap_ $name s>]<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<$t>, D::Error> {
#[derive(Deserialize)]
struct List {
#[serde(default)]
$name: Vec<$t>,
}
Ok(List::deserialize(deserializer)?.$name)
}
}
};
async fn download_zip(url: &str) -> anyhow::Result<ZipArchive<Cursor<Vec<u8>>>> {
let response = reqwest::get(url).await?;
let response = Cursor::new(response.bytes().await?.to_vec());
Ok(zip::ZipArchive::new(response)?)
}
#[derive(Deserialize, Debug)]
pub struct Metiers {
#[serde(rename = "$value")]
metiers: Vec<Metier>,
}
unwrap_vec!(synonyme, String);
#[derive(Deserialize, Debug)]
#[derive(Default, PartialEq, Debug, Deserialize)]
pub struct Metier {
#[serde(rename = "identifiant")]
id: String,
nom_metier: String,
libelle_feminin: String,
libelle_masculin: String,
#[serde(deserialize_with = "unwrap_synonymes")]
synonymes: Vec<String>,
acces_metier: String,
#[serde(deserialize_with = "synonymes")]
pub synonymes: Vec<String>,
#[serde(deserialize_with = "paragraph_list")]
competences: Vec<String>,
#[serde(rename = "acces_metier", deserialize_with = "paragraph_list")]
conditions: Vec<String>,
#[serde(rename = "niveau_acces_min", deserialize_with = "minimal_level")]
minimal_level: String,
#[serde(rename = "formations_min_requise", deserialize_with = "formations")]
formations: Vec<String>,
}
pub async fn download_metier() -> anyhow::Result<Metiers> {
let response =
reqwest::get("https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip")
.await?;
let response = Cursor::new(response.bytes().await?.to_vec());
let mut archive = zip::ZipArchive::new(response)?;
let file = archive.by_index(0)?;
let metiers = quick_xml::de::from_reader(BufReader::new(file))?;
Ok(metiers)
fn synonymes<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<String>, D::Error> {
#[derive(Deserialize)]
struct Synonymes {
#[serde(default)]
synonyme: Vec<String>,
}
Ok(Synonymes::deserialize(deserializer)?.synonyme)
}
fn paragraph_list<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<String>, D::Error> {
let data = String::deserialize(deserializer)?;
let mut reader = quick_xml::Reader::from_str(&data);
reader.trim_text(true);
let mut lines = Vec::new();
loop {
match reader.read_event() {
Ok(Event::Eof) => break,
Ok(Event::Text(text)) => {
lines.push(text.unescape().map_err(D::Error::custom)?.into_owned())
}
Ok(_) => {}
Err(err) => return Err(D::Error::custom(err)),
}
}
Ok(lines)
}
fn minimal_level<'de, D: Deserializer<'de>>(deserializer: D) -> Result<String, D::Error> {
#[derive(Deserialize)]
struct NiveauAccesMin {
#[serde(default)]
libelle: String,
}
Ok(NiveauAccesMin::deserialize(deserializer)?.libelle)
}
fn formations<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<String>, D::Error> {
#[derive(Deserialize)]
struct Formations {
#[serde(default)]
formation_min_requise: Vec<Formation>,
}
#[derive(Deserialize)]
struct Formation {
id: String,
}
Ok(Formations::deserialize(deserializer)?
.formation_min_requise
.into_iter()
.map(|f| f.id)
.collect())
}
pub async fn download_metiers() -> anyhow::Result<HashMap<String, Metier>> {
let mut archive =
download_zip("https://api.opendata.onisep.fr/downloads/5fe0808a2da6f/5fe0808a2da6f.zip")
.await?;
#[derive(Deserialize)]
struct Metiers {
metier: Vec<Metier>,
}
let file = archive.by_index(0)?;
let metiers: Metiers = quick_xml::de::from_reader(BufReader::new(file))?;
Ok(metiers
.metier
.into_iter()
.map(|m| (m.id.clone(), m))
.collect())
}
#[derive(Default, PartialEq, Debug, Deserialize)]
pub struct Formation {
#[serde(rename = "identifiant")]
id: String,
libelle_complet: String,
#[serde(default, deserialize_with = "paragraph_list")]
descriptif_format_court: Vec<String>,
#[serde(default, deserialize_with = "paragraph_list")]
descriptif_acces: Vec<String>,
}
pub async fn download_formations() -> anyhow::Result<HashMap<String, Formation>> {
let mut archive =
download_zip("https://api.opendata.onisep.fr/downloads/5fe07a9ecc960/5fe07a9ecc960.zip")
.await?;
#[derive(Deserialize)]
struct Formations {
formation: Vec<Formation>,
}
let file = archive.by_index(0)?;
let formations: Formations = quick_xml::de::from_reader(BufReader::new(file))?;
Ok(formations
.formation
.into_iter()
.map(|f| (f.id.clone(), f))
.collect())
}

View file

@ -1,8 +1,13 @@
use anyhow::Ok;
use backend::Metier;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let metiers = backend::download_metier().await?;
let metiers = backend::download_metiers().await?;
println!("{:#?}", metiers);
let formations = backend::download_formations().await?;
println!("{:#?}", formations);
Ok(())
}