Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on May 30, 2021. It is now read-only.

Commit 65ceaf4

Browse files
authored
Switch US-AZ source to only use timeseries. (#1042)
1 parent 8a61152 commit 65ceaf4

1 file changed

Lines changed: 73 additions & 30 deletions

File tree

src/shared/scrapers/US/AZ/index.js

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import * as fetch from '../../../lib/fetch/index.js';
22
import * as parse from '../../../lib/parse.js';
33
import * as transform from '../../../lib/transform.js';
4-
import * as geography from '../../../lib/geography/index.js';
4+
import datetime from '../../../lib/datetime/index.js';
55

6-
// Set county to this if you only have state data, but this isn't the entire state
7-
// const UNASSIGNED = '(unassigned)';
6+
const assert = require('assert');
87

98
const scraper = {
109
state: 'iso2:US-AZ',
1110
country: 'iso1:US',
11+
timeseries: true,
1212
sources: [
1313
{
1414
url: 'https://www.azdhs.gov/',
@@ -20,48 +20,91 @@ const scraper = {
2020
aggregate: 'county',
2121
scraper: {
2222
'0': async function() {
23-
// Get the Tableau chart
24-
const $ = await fetch.headless(this, this.url, 'tmpindex');
23+
this.url = 'https://opendata.arcgis.com/datasets/5b34cf1637434c7bb6793580c40d1685_0.csv';
24+
const data = await fetch.csv(this, this.url, 'default', false);
2525

26-
// Pull out our session id from the json stuffed inside the textarea
27-
const textArea = $('textarea#tsConfigContainer').text();
28-
const j = JSON.parse(textArea);
29-
const sessionId = j.sessionid;
26+
// Convert T_* field to yyyy-mm-dd string
27+
// eg. T_4022020 or T_04022020 -> '2020-04-02'
28+
function parseDateField(s) {
29+
// Sometimes AZ decides to output their dates differently, eg
30+
// T_5122020,T_5202013 -- note the first is m/dd/yyyy, the
31+
// next is m/yyyy/dd. Other examples: T_3202021,T_3202022. Super.
32+
let tmp = s;
33+
for (let y = 2020; y <= 2021; y++) {
34+
const ys = `${y}`;
35+
if (tmp.includes(ys)) {
36+
tmp = `${tmp.split(ys).join('')}${ys}`;
37+
}
38+
}
3039

31-
// Fetch the magic URL with our current session ID
32-
const url = `https://tableau.azdhs.gov/vizql/w/COVID-19Dashboard/v/COVID-19table/vud/sessions/${sessionId}/views/8275719771277684273_9753144220671897612?csv=true&summary=true`;
40+
let d = tmp.split('_')[1];
41+
d = d.padStart(8, '0');
42+
const month = d.slice(0, 2);
43+
const day = d.slice(2, 4);
44+
const year = d.slice(4);
3345

34-
// Parse the tab separated values file that comes back
35-
const data = await fetch.tsv(this, url, 'default');
36-
const counties = [];
46+
const p = n => parseInt(n, 10);
47+
assert(p(day) >= 1 && p(day) <= 31, `day ${day} valid for ${d}`);
48+
assert(p(month) >= 1 && p(month) <= 12, `month ${month} valid for ${d}`);
49+
assert(p(year) >= 2020 && p(year) <= new Date().getFullYear(), `year ${year} valid for ${d}`);
50+
const ret = [year, month, day]
51+
.map(n => `${n}`)
52+
.map(s => s.padStart(2, '0'))
53+
.join('-');
54+
return ret;
55+
}
3756

38-
for (const row of data) {
39-
const county = geography.addCounty(row.County);
40-
const cases = parse.number(row.Count);
57+
const datefields = Object.keys(data[0]).filter(f => f.match(/^T_\d+/));
58+
const dataFixedHeadings = data.map(d => {
59+
const rec = {
60+
name: d.NAME,
61+
cases: parse.number(d.Number_Confirmed || 0),
62+
maxcases: 0,
63+
maxdate: null
64+
};
65+
datefields.reduce((hsh, df) => {
66+
const c = d[df] === '' ? undefined : parse.number(d[df]);
67+
if (c !== undefined) hsh[parseDateField(df)] = c;
68+
if ((c || 0) > rec.maxcases) {
69+
rec.maxcases = c;
70+
rec.maxdate = df;
71+
}
72+
return hsh;
73+
}, rec);
74+
return rec;
75+
});
4176

42-
counties.push({
43-
county,
44-
cases
77+
const warnings = dataFixedHeadings.filter(d => d.maxcases > d.cases);
78+
if (warnings.length > 0) {
79+
console.log(`Warning: cases potentially incorrect:`);
80+
warnings.forEach(w => {
81+
console.log(`* ${w.name}: ${w.maxcases} > ${w.cases} on ${w.maxdate}`);
4582
});
4683
}
4784

48-
counties.push(transform.sumData(counties));
49-
return counties;
50-
},
51-
'2020-03-30': async function() {
52-
this.url = 'https://opendata.arcgis.com/datasets/5b34cf1637434c7bb6793580c40d1685_0.csv';
53-
const data = await fetch.csv(this, this.url, 'default');
54-
const counties = [];
85+
const scrapeDate = process.env.SCRAPE_DATE ? new Date(`${process.env.SCRAPE_DATE} 12:00:00`) : new Date();
86+
let scrapeDateString = datetime.getYYYYMMDD(scrapeDate);
87+
const dates = Object.keys(dataFixedHeadings[0]).filter(f => f.match(/^\d+-\d+-\d+/));
88+
if (scrapeDateString < dates[0]) {
89+
throw new Error(`date ${scrapeDateString} < first date of data ${dates[0]}`);
90+
}
91+
const lastDate = dates[dates.length - 1];
92+
if (scrapeDateString > lastDate) {
93+
console.log(`US/AZ date ${scrapeDateString} > last date ${lastDate}, using last date.`);
94+
scrapeDateString = lastDate;
95+
}
5596

56-
for (const county of data) {
97+
const counties = [];
98+
for (const d of dataFixedHeadings) {
5799
counties.push({
58100
// unfortunately even arcgis isnt reporting any death data
59-
county: county.NAME,
60-
cases: parse.number(county.Number_Confirmed || 0)
101+
county: d.name,
102+
cases: d[scrapeDateString]
61103
});
62104
}
63105

64106
counties.push(transform.sumData(counties));
107+
// console.table(counties);
65108
return counties;
66109
}
67110
}

0 commit comments

Comments
 (0)