11import * as fetch from '../../../lib/fetch/index.js' ;
22import * as parse from '../../../lib/parse.js' ;
33import * as transform from '../../../lib/transform.js' ;
4- import * as geography from '../../../lib/geography /index.js' ;
4+ import datetime from '../../../lib/datetime /index.js' ;
55
6- // Set county to this if you only have state data, but this isn't the entire state
7- // const UNASSIGNED = '(unassigned)';
6+ const assert = require ( 'assert' ) ;
87
98const scraper = {
109 state : 'iso2:US-AZ' ,
1110 country : 'iso1:US' ,
11+ timeseries : true ,
1212 sources : [
1313 {
1414 url : 'https://www.azdhs.gov/' ,
@@ -20,48 +20,91 @@ const scraper = {
2020 aggregate : 'county' ,
2121 scraper : {
2222 '0' : async function ( ) {
23- // Get the Tableau chart
24- const $ = await fetch . headless ( this , this . url , 'tmpindex' ) ;
23+ this . url = 'https://opendata.arcgis.com/datasets/5b34cf1637434c7bb6793580c40d1685_0.csv' ;
24+ const data = await fetch . csv ( this , this . url , 'default' , false ) ;
2525
26- // Pull out our session id from the json stuffed inside the textarea
27- const textArea = $ ( 'textarea#tsConfigContainer' ) . text ( ) ;
28- const j = JSON . parse ( textArea ) ;
29- const sessionId = j . sessionid ;
26+ // Convert T_* field to yyyy-mm-dd string
27+ // eg. T_4022020 or T_04022020 -> '2020-04-02'
28+ function parseDateField ( s ) {
29+ // Sometimes AZ decides to output their dates differently, eg
30+ // T_5122020,T_5202013 -- note the first is m/dd/yyyy, the
31+ // next is m/yyyy/dd. Other examples: T_3202021,T_3202022. Super.
32+ let tmp = s ;
33+ for ( let y = 2020 ; y <= 2021 ; y ++ ) {
34+ const ys = `${ y } ` ;
35+ if ( tmp . includes ( ys ) ) {
36+ tmp = `${ tmp . split ( ys ) . join ( '' ) } ${ ys } ` ;
37+ }
38+ }
3039
31- // Fetch the magic URL with our current session ID
32- const url = `https://tableau.azdhs.gov/vizql/w/COVID-19Dashboard/v/COVID-19table/vud/sessions/${ sessionId } /views/8275719771277684273_9753144220671897612?csv=true&summary=true` ;
40+ let d = tmp . split ( '_' ) [ 1 ] ;
41+ d = d . padStart ( 8 , '0' ) ;
42+ const month = d . slice ( 0 , 2 ) ;
43+ const day = d . slice ( 2 , 4 ) ;
44+ const year = d . slice ( 4 ) ;
3345
34- // Parse the tab separated values file that comes back
35- const data = await fetch . tsv ( this , url , 'default' ) ;
36- const counties = [ ] ;
46+ const p = n => parseInt ( n , 10 ) ;
47+ assert ( p ( day ) >= 1 && p ( day ) <= 31 , `day ${ day } valid for ${ d } ` ) ;
48+ assert ( p ( month ) >= 1 && p ( month ) <= 12 , `month ${ month } valid for ${ d } ` ) ;
49+ assert ( p ( year ) >= 2020 && p ( year ) <= new Date ( ) . getFullYear ( ) , `year ${ year } valid for ${ d } ` ) ;
50+ const ret = [ year , month , day ]
51+ . map ( n => `${ n } ` )
52+ . map ( s => s . padStart ( 2 , '0' ) )
53+ . join ( '-' ) ;
54+ return ret ;
55+ }
3756
38- for ( const row of data ) {
39- const county = geography . addCounty ( row . County ) ;
40- const cases = parse . number ( row . Count ) ;
57+ const datefields = Object . keys ( data [ 0 ] ) . filter ( f => f . match ( / ^ T _ \d + / ) ) ;
58+ const dataFixedHeadings = data . map ( d => {
59+ const rec = {
60+ name : d . NAME ,
61+ cases : parse . number ( d . Number_Confirmed || 0 ) ,
62+ maxcases : 0 ,
63+ maxdate : null
64+ } ;
65+ datefields . reduce ( ( hsh , df ) => {
66+ const c = d [ df ] === '' ? undefined : parse . number ( d [ df ] ) ;
67+ if ( c !== undefined ) hsh [ parseDateField ( df ) ] = c ;
68+ if ( ( c || 0 ) > rec . maxcases ) {
69+ rec . maxcases = c ;
70+ rec . maxdate = df ;
71+ }
72+ return hsh ;
73+ } , rec ) ;
74+ return rec ;
75+ } ) ;
4176
42- counties . push ( {
43- county,
44- cases
77+ const warnings = dataFixedHeadings . filter ( d => d . maxcases > d . cases ) ;
78+ if ( warnings . length > 0 ) {
79+ console . log ( `Warning: cases potentially incorrect:` ) ;
80+ warnings . forEach ( w => {
81+ console . log ( `* ${ w . name } : ${ w . maxcases } > ${ w . cases } on ${ w . maxdate } ` ) ;
4582 } ) ;
4683 }
4784
48- counties . push ( transform . sumData ( counties ) ) ;
49- return counties ;
50- } ,
51- '2020-03-30' : async function ( ) {
52- this . url = 'https://opendata.arcgis.com/datasets/5b34cf1637434c7bb6793580c40d1685_0.csv' ;
53- const data = await fetch . csv ( this , this . url , 'default' ) ;
54- const counties = [ ] ;
85+ const scrapeDate = process . env . SCRAPE_DATE ? new Date ( `${ process . env . SCRAPE_DATE } 12:00:00` ) : new Date ( ) ;
86+ let scrapeDateString = datetime . getYYYYMMDD ( scrapeDate ) ;
87+ const dates = Object . keys ( dataFixedHeadings [ 0 ] ) . filter ( f => f . match ( / ^ \d + - \d + - \d + / ) ) ;
88+ if ( scrapeDateString < dates [ 0 ] ) {
89+ throw new Error ( `date ${ scrapeDateString } < first date of data ${ dates [ 0 ] } ` ) ;
90+ }
91+ const lastDate = dates [ dates . length - 1 ] ;
92+ if ( scrapeDateString > lastDate ) {
93+ console . log ( `US/AZ date ${ scrapeDateString } > last date ${ lastDate } , using last date.` ) ;
94+ scrapeDateString = lastDate ;
95+ }
5596
56- for ( const county of data ) {
97+ const counties = [ ] ;
98+ for ( const d of dataFixedHeadings ) {
5799 counties . push ( {
58100 // unfortunately even arcgis isnt reporting any death data
59- county : county . NAME ,
60- cases : parse . number ( county . Number_Confirmed || 0 )
101+ county : d . name ,
102+ cases : d [ scrapeDateString ]
61103 } ) ;
62104 }
63105
64106 counties . push ( transform . sumData ( counties ) ) ;
107+ // console.table(counties);
65108 return counties ;
66109 }
67110 }
0 commit comments