-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnormaliza_demografia_municipios.py
More file actions
69 lines (63 loc) · 2.5 KB
/
normaliza_demografia_municipios.py
File metadata and controls
69 lines (63 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import csv
import re
import glob
INPUT_DIR = "../downloads/poblacion_censada_csvs"
OUTPUT_CSV = "../downloads/normalizacion/municipality_demographics.csv"
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
# Helper to convert filename to range
# e.g. 0_4.csv -> 0-4
def filename_to_range(filename):
base = os.path.splitext(os.path.basename(filename))[0]
return base.replace('_', '-')
def clean_total(val):
# Convierte '1.039,00' a 1039
val = val.replace('.', '')
if ',' in val:
val = val.split(',')[0]
try:
return int(val)
except ValueError:
return None
def main():
rows = []
for filepath in glob.glob(os.path.join(INPUT_DIR, '*.csv')):
range_val = filename_to_range(filepath)
with open(filepath, encoding='utf-8') as f:
reader = csv.reader(f, delimiter=';')
lines = list(reader)
# Find header with years (e.g. Serie;;;2021;2022;2023;2024)
year_row = None
for i, line in enumerate(lines):
if line and re.match(r'Serie', line[0]):
year_row = line
break
if not year_row:
continue
years = [y for y in year_row if re.match(r'\d{4}', y)]
# Find all municipality rows
for line in lines:
if line and line[0].startswith('Municipios'):
id_secondary = line[1].strip()
name = line[2].strip()
for idx, year in enumerate(years):
# The value for each year is at idx+3 (since first columns are: Municipios;id;name;val1;val2;...)
try:
total_raw = line[idx+3].strip()
total = clean_total(total_raw)
except IndexError:
total = None
rows.append({
'id_secondary_municipality': id_secondary,
'name': name,
'total': total,
'year': year,
'range': range_val
})
with open(OUTPUT_CSV, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=['id_secondary_municipality', 'name', 'total', 'year', 'range'])
writer.writeheader()
writer.writerows(rows)
print(f"Archivo generado: {OUTPUT_CSV}")
if __name__ == "__main__":
main()