Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 48568af

Browse files
committed
added SAS language (see issue #199)
1 parent 4c7bedc commit 48568af

File tree

2 files changed

+271
-0
lines changed

2 files changed

+271
-0
lines changed

lib/coderay/helpers/file_type.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def type_from_shebang filename
119119
'rpdf' => :ruby,
120120
'ru' => :ruby, # config.ru
121121
'rxml' => :ruby,
122+
'sas' => :sas,
122123
'sass' => :sass,
123124
'sql' => :sql,
124125
'taskpaper' => :taskpaper,

lib/coderay/scanners/sas.rb

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
module CodeRay
2+
module Scanners
3+
4+
class SAS < Scanner
5+
6+
register_for :sas
7+
8+
file_extension 'sas'
9+
10+
# List all token kinds that are not considered to be running code
11+
KINDS_NOT_LOC = [
12+
:comment
13+
] # :nodoc:
14+
15+
# See the WordList documentation.
16+
#CONSTANTS = %w( true false null )
17+
#IDENT_KIND = WordList.new(:key).add(CONSTANTS, :value)
18+
19+
#ESCAPE = / [bfnrt\\"\/] /x
20+
#UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x
21+
22+
module Words # :nodoc:
23+
24+
CONSTANT = %w[_n_ _null_]
25+
26+
MACROBOUND = %w[%macro %mend]
27+
28+
MACROSTATEMENT = %w[%abort %display %do %else %end %for %global %global %if %include %macro %mend
29+
%nrstr %put %quote %str %sysfunc %then %to %unquote %until %while %window]
30+
31+
# for a complete list, see http://support.sas.com/documentation/cdl/en/allprodsproc/63875/HTML/default/viewer.htm#a003135046.htm
32+
# for extras included here (run, quit, etc.), see https://gist.github.com/cjdinger/7cf251399ef29b9b90b324a6fc442fca
33+
PROCNAME = %w[access aceclus allele anom anova append appsrv arima autoreg bmdp bom
34+
boxplot btl build calendar calis callrfc cancorr candisc capability
35+
casecontrol catalog catmod cdisc chart cimport clp cluster compare
36+
compile computab contents convert copula copy corr corresp countreg cpm
37+
cport cusum cv2view data datasets datasource db2ext db2util dbcstab dbf
38+
dbload define_event define_tagset dif discrim display distance docparse
39+
document download dqmatch dqscheme dqsrvadm dqsrvsvc dtree entropy esm
40+
expand explode export factex factor family fastclus fcmp fmm fontreg
41+
forecast format forms freq fsbrowse fsedit fsletter fslist fsview g3d
42+
g3grid ga gam ganno gantt gareabar gbarline gchart gcontour gdevice
43+
geneselect genmod geocode gfont gimport ginside gis gkeymap gkpi
44+
glimmix glm glmmod glmpower glmselect gmap goptions gplot gproject gradar
45+
greduce gremove greplay groovy gslide gtestit gtile haplotype hpcountreg
46+
hpdmdb hpds2 hpf hpfarimaspec hpfdiagnose hpfengine hpfesmspec hpfevents
47+
hpfexmspec hpfidmspec hpforest hpfreconcile hpfselect hpfucmspec hplmixed
48+
hplogistic hpmixed hpneural hpnlin hpreduce hpreg hpsample hpseverity
49+
hpsummary htsnp http iml import inbreed infomaps intpoint ishikawa items
50+
javainfo kde krige2d lattice lifereg lifetest loan loess logistic lp
51+
macontrol macro mapimport mcmc mdc mddb mds means mend metadata metalib
52+
metaoperate mi mianalyze migrate mixed modeclus model multtest nested
53+
netdraw netflow nlin nlmixed nlp npar1way olap olapoperate operate
54+
optex options optload optlp optmilp optmodel optqp optsave orthoreg
55+
panel pareto pdlreg pds pdscopy phreg plan plm plot pls pm pmenu power
56+
princomp prinqual print printto probit proc proto prtdef prtexp psmooth
57+
pwencode qdevice qlim quantreg quest quit rank rdc rdpool rdsec reg
58+
registry release reliability report risk robustreg rsreg run scaproc
59+
score seqdesign seqtest server severity sgdesign sgpanel sgplot sgrender
60+
sgscatter shewhart sim2d similarity simlin simnormal soap sort source
61+
spectra sql standard statespace statgraph stdize stepdisc stp summary
62+
surveyfreq surveylogistic surveymeans surveyphreg surveyreg surveyselect
63+
syslin tabulate tapecopy tapelabel tcalis template timeid timeplot
64+
timeseries tpspline trans transpose transreg trantab tree tscsreg
65+
tspl ttest ucm univariate upload userproc varclus varcomp variogram
66+
varmax vaxtointeg webmddb x11 x12 xsl]
67+
68+
STATEMENT = %w[abort array attrib axis by class endrsubmit file filename footnote format freq goptions
69+
infile informat killtask legend libname listtask model note ods options pattern rdisplay
70+
rget rsubmit select signoff signon symbol sysecho systask table title waitfor where
71+
weight xaxis yaxis xaxis2 yaxis2]
72+
73+
KEYWORD = %w[_all_ add alter array attrib axis bandplot barchart barchartparm
74+
bihistogram3dparm blockplot boxplot boxplotparm break by cards cards4
75+
class clear close column columns compute continuouslegend contourplotparm
76+
data data declare define densityplot describe disconnect discretelegend
77+
distinct dm drop drop dropline dynamic edit edit ellipse ellipseparm
78+
else endcomp entry entryfootnote entrytitle excel execute filename
79+
footnote format format freq fringeplot from goptions graphics group
80+
histogram histogramparm html html html5 id if informat input insert
81+
into keep killtask lineparm listing listing listtask loessplot merge
82+
model modelband needleplot nloptions ods options options parmcards
83+
parmcards4 pbsplineplot powerpoint proc proc put rand ranks rbreak
84+
rdisplay referenceline regressionplot replace reset retain rget rtf run
85+
scatterplot scatterplotmatrix seriesplot set signoff signon stepplot
86+
style surfaceplotparm symbol sysecho table tables tagsets then title
87+
title unique update validate value var var vectorplot waitfor weight where]
88+
89+
FUNCTION = %w[abs addr addrlong airy allcomb allcombi allperm anyalnum anyalpha
90+
anycntrl anydigit anyfirst anygraph anylower anyname anyprint anypunct
91+
anyspace anyupper anyxdigit arcos arcosh armend armgtid arminit armjoin
92+
armproc armstop armstrt armupdt arsin arsinh artanh ascebc atan atan2
93+
attrc attrn band beta betainv blackclprc blackptprc blkshclprc blshift
94+
bnot bor bquote brshift bxor byte cat catq cats catt catx cdf ceil
95+
ceilz cexist char choosec choosen cinv close cmiss cmpres cnonct
96+
coalesce coalescec collate comb compare compbl compcost compged complev
97+
compound compress compstor constant convx convxp cos cosh count countc
98+
countw css curobs cv daccdb daccdbsl daccsl daccsyd dacctab dairy datatyp
99+
datdif date datejul datepart datetime day dclose dcreate delete depdb
100+
depdbsl depsl depsyd deptab dequote deviance dhms dif digamma dim dinfo
101+
divide dnum dopen doptname doptnum dqcase dqgender dqgenderinfoget
102+
dqgenderparsed dqidentify dqlocaleguess dqlocaleinfoget dqlocaleinfolist
103+
dqmatch dqmatchinfoget dqmatchparsed dqparse dqparseinfoget dqparsetokenget
104+
dqparsetokenput dqpattern dqschemeapply dqsrvarchjob dqsrvcopylog
105+
dqsrvdeletelog dqsrvjobstatus dqsrvkilljob dqsrvprofjobfile dqsrvprofjobrep
106+
dqsrvuser dqstandardize dqtoken dread dropnote dsname dur durp ebcasc
107+
effrate envlen erf erfc euclid eval exist exp fact fappend fclose fcol
108+
fdelete fetch fetchobs fexist fget fileattr fileexist filename fileref
109+
finance find findc findfile findw finfo finv fipname fipnamel fipstate
110+
first floor floorz fnonct fnote fopen foptname foptnum fpoint fpos fput
111+
fread frewind frlen fsep fuzz fwrite gaminv gamma garkhclprc garkhptprc
112+
gcd geodist geomean geomeanz getdvi getjpi getlog getmsg getoption getquota
113+
getsym getterm getvarc getvarn graycode grdsvc_enable grdsvc_getaddr
114+
grdsvc_getinfo grdsvc_getname grdsvc_nnodes harmean harmeanz hbound hms
115+
holiday hour htmldecode htmlencode ibessel ifc ifn index indexc indexw
116+
input inputc inputn int intcindex intck intcycle intfit intfmt intget
117+
intindex intnx intrr intseas intshift inttest intz invcdf iorcmsg iqr irr
118+
isnull jbessel juldate juldate7 kurtosis label lag largest lbound lcm lcomb
119+
left length lengthc lengthm lengthn lexcomb lexcombi lexperk lexperm lfact
120+
lgamma libname libref limmoment log log10 log1px log2 logbeta logcdf logistic
121+
logpdf logsdf lowcase lperm lpnorm mad margrclprc margrptprc max md5 mdy mean
122+
median min minute missing missing mod module modulec modulen modz month mopen
123+
mort msplint mvalid n netpv nliteral nmiss nodename nomrate notalnum notalpha
124+
notcntrl notdigit note notfirst notgraph notlower notname notprint notpunct
125+
notspace notupper notxdigit npv nrbquote nrquote nrstr nvalid nwkdom open
126+
ordinal pathname pctl pdf peek peekc peekclong peeklong perm point poisson
127+
poke pokelong probbeta probbnml probbnrm probchi probf probgam probhypr
128+
probit probmc probnegb probnorm probt propcase prxchange prxdebug prxfree
129+
prxmatch prxnext prxparen prxparse prxposn prxsubstr ptrlongadd put putc
130+
putlog putn putsym pvp qcmpres qleft qlowcase qscan qsubstr qsysfunc qtr
131+
qtrim quantile quote qupcase ranbin rancau rand ranexp rangam range rank
132+
rannor ranperk ranperm ranpoi rantbl rantri ranuni read_array rename repeat
133+
resolve reverse rewind right rms round rounde roundz run_macro run_sasfile
134+
saving savings scan scanq sdf second set setterm sign sin sinh skewness sleep
135+
smallest soapweb soapwebmeta soapwipservice soapwipsrs soapws soapwsmeta
136+
softmax solve sortc sortn soundex spedis sqrt squantile std stderr stdize
137+
stfips stname stnamel str streaminit strip subpad substr substrn sum sumabs
138+
superq symexist symget symglobl symlocal symput symputx sysevalf sysexist
139+
sysfunc sysget sysmsg sysparm sysprocessid sysprocessname sysprod sysrc
140+
system tan tanh termin termout time timepart timevalue tinv tnonct today
141+
translate transtrn tranwrd trigamma trim trimn trunc ttclose ttcontrl
142+
ttopen ttread ttwrite uniform unquote upcase urldecode urlencode uss
143+
uuidgen var varfmt varinfmt varlabel varlen varname varnum varray varrayx
144+
vartype verify vformat vformatd vformatdx vformatn vformatnx vformatw
145+
vformatwx vformatx vinarray vinarrayx vinformat vinformatd vinformatdx
146+
vinformatn vinformatnx vinformatw vinformatwx vinformatx vlabel vlabelx
147+
vlength vlengthx vms vname vnamex vnext vtype vtypex vvalue vvaluex week
148+
weekday whichc whichn write_array year yieldp yrdif yyq zipcity zipcitydistance
149+
zipfips zipname zipnamel zipstate]
150+
151+
end
152+
153+
def scan_tokens encoder, options
154+
155+
# The scanner is always in a certain state, which is :initial by default.
156+
# We use local variables and symbols to maximize speed.
157+
state = :initial
158+
159+
# stack, as a Ruby array
160+
stack = []
161+
162+
# Define more flags and variables as you need them.
163+
key_expected = false
164+
165+
# The main loop; eos? is true when the end of the code is reached.
166+
until eos?
167+
168+
# Depending on the state, we want to do different things.
169+
case state
170+
171+
# Normally, we use this case.
172+
when :initial
173+
174+
# match white space
175+
if match = scan(/ \s+ /x)
176+
encoder.text_token match, :space
177+
178+
# DATALINES/CARDS/LINES: http://sascommunity.org/wiki/DATALINES_statement
179+
if match = scan(/ (^|[\r\n])\s*(?:(?:data)?lines|cards|datalines4);[\s\S]+?[\r\n](\s)*; /i)
180+
encoder.text_token match, :dataline
181+
182+
# comments: see http://sascommunity.org/wiki/Comment_statement
183+
elseif match = scan(/ (^\s*|;\s*)\*.*; /m) or match = scan(/\/\*[\s\S]+?\*\//)
184+
or match = scan(/ %(^\s*|;\s*)\*.*; /m)
185+
encoder.text_token match, :comment
186+
187+
# dates/datetimes are LIKE strings, but not quite
188+
# "1Jan2016"d, '3:14:15pm't, '31jul2001:9:27:05am'dt
189+
elseif match = scan(/ ['"][^'"]+['"](?:d|d?t)\b /i)
190+
encoder.text_token match, :datetime
191+
192+
# operators:
193+
elsif match = scan(/ \*\*|\|\||!!|¦¦|<>|><|[~¬^<>]?=|[*\/+\-<>&\|!¦~¬^]|\b(?:eq|ne|gt|lt|ge|le|in|not)\b /ix)
194+
encoder.text_token match, :operator
195+
196+
# match number; SAS isn't picky about types,
197+
# but we need to care about that in format/informat specifications
198+
# can look like Decimal (1.2e23) or hexadecimal (0c1x)
199+
elsif match = scan(/ (?:\B-|\b)(?:[\da-f]+x|\d+(?:\.\d+)?(?:e[+-]?\d+)?) /ix)
200+
encoder.text_token match, :number
201+
202+
elsif match = scan(/["']/)
203+
# A "quoted" token was found, and we know whether it is a key or a string.
204+
state = :string
205+
# This opens a token group and encodes the delimiter token.
206+
encoder.begin_group state
207+
encoder.text_token match, :delimiter
208+
209+
elseif match = scan( /[$%@.(){}\[\];,\\]/ )
210+
encoder.text_token match, :punctuation
211+
212+
else
213+
# If we reach invalid code, we try to discard
214+
# chars one by one and mark them as :error.
215+
encoder.text_token getch, :error
216+
end
217+
218+
# String scanning is a bit more complicated, so we use another state for it.
219+
# The scanner stays in :string state until the string ends or an error occurs.
220+
when :string
221+
222+
# Another if-elsif-else-switch, for strings this time.
223+
if match = scan(/[^\\"']+/)
224+
# Everything that is not \ or " is just string content.
225+
# includes new lines in SAS
226+
encoder.text_token match, :content
227+
228+
elsif match = scan(/["']/)
229+
# A " or ' is found, which means this string is ending here.
230+
# A special token class, :delimiter, is used for tokens like this one.
231+
encoder.text_token match, :delimiter
232+
# Always close your token groups using the right token kind!
233+
encoder.end_group state
234+
# We're going back to normal scanning here.
235+
state = :initial
236+
237+
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
238+
# A valid special character should be classified as :char.
239+
240+
encoder.text_token match, :char
241+
elsif match = scan(/\\./m)
242+
# Anything else that is escaped (including \n, we use the m modifier) is
243+
# just content.
244+
encoder.text_token match, :content
245+
246+
else
247+
# Nice for debugging. Should never happen.
248+
raise_inspect "else case \" reached; %p not handled." % [peek(1)], encoder
249+
end
250+
251+
else
252+
# Nice for debugging. Should never happen.
253+
raise_inspect 'Unknown state: %p' % [state], encoder
254+
255+
end
256+
end
257+
258+
# If we still have a string or key token group open, close it.
259+
if [:string, :key].include? state
260+
encoder.end_group state
261+
end
262+
263+
# Return the encoder.
264+
encoder
265+
end
266+
267+
end
268+
269+
end
270+
end

0 commit comments

Comments
 (0)