Click to See Complete Forum and Search --> : Junk charactors in form


MacPC
11-25-2008, 10:52 AM
Hi,

My site has a form. I have visitor come to the form to filled in a bunch of random charactors and also put in links like this:
<a
href="http://iymxlzgwezch.com/">iymxlzgwezch</a>,
czznspgxcnzk (http://czznspgxcnzk.com/),
rpyvwekyligy,
http://zvggihvgkiwt.com/

I don't understand what the purpose of this, why people do that? is this a scurity issue? how can I prevents it?

thanks

javawebdog
11-25-2008, 03:20 PM
Same problem here. So I extended my form validation to screen out suspicious looking elements by adding this javascript to my std form validation:
var prohibStr="";
var prohibChars=new Array('\<a','href','\</a\>','[url','[/url]','[link','[/link','https','http','ftp','smtp','pop3','nntp','imap','irc','\#','\%','\^','\{','\}','com\?','\<','\>');

for(p in prohibChars){
//alert(prohibChars[p]);
if(document.getElementById('comments').value.lastIndexOf(prohibChars[p])!=-1){
prohibStr+='\r\n'+prohibChars[p]+'\r\n';

}
}
if(prohibStr.length>0){
alert('Your Comments contain restricted or disallowed character(s): '+prohibStr+'\r\n\r\nPlease review your comments and remove them to continue.');
return false;
}

Am also experimenting with one dimensional semantic analysis: comparing the calculating a ratio (# of 500 most common English words found in msg)/(# of words in the msg). No very sophisticated but a fun exercise, uses two pages: 1) a page where the form lives, 2) an error page when the 'test' is failed.

Here they are:

<html>
<head>
<META HTTP-EQUIV="Pragma" CONTENT="no-cache">
<META HTTP-EQUIV="Expires" CONTENT="-1">
<script>
var el="";
var char_count=0;
var fullStr="";
var initial_whitespace_rExp="";
var left_trimmedStr="";
var non_alphanumerics_rExp="";
var cleanedStr="";
var splitString="";
var word_count=0;
var wordOrWords="";
var charOrChars="";
var foundMatches="";

function CountWords(this_field){

el=document.getElementById(this_field);
char_count=el.value.length; // very crude measure
fullStr=el.value+" "; // add space delimiter to end of text
initial_whitespace_rExp= /^[^A-Za-z0-9]+/gi; //use for complex whitespace
left_trimmedStr=fullStr.replace(initial_whitespace_rExp, " ");
non_alphanumerics_rExp=/[^A-Za-z0-9]+/gi; // and for delimiters
cleanedStr=left_trimmedStr.replace(non_alphanumerics_rExp, " ");
splitString=cleanedStr.split(" ");
word_count=splitString.length -1;

var reqdWords=new Array("the","of","to","and","a","in","is","it","you","that","he","was","for","on","are","with","as","I","his","they","be","at","one","have","this","from","or","had","by","hot","but","some","what","there","we","can","out","other","were","all","your","when","up","use","word","how","said","an","each","she","which","do","their","time","if","will","way","about","many","then","them","would","write","like","so","these","her","long","make","thing","see","him","two","has","look","more","day","could","go","come","did","my","sound","no","most","number","who","over","know","water","than","call","first","people","may","down","side","been","now","find","any","new","work","part","take","get","place","made","live","where","after","back","little","only","round","man","year","came","show","every","good","me","give","our","under","open","seem","together","next","white","children","begin","got","walk","example","ease","paper","often","always","music","those","both","mark","book","letter","until","mile","river","car","feet","care","second","group","carry","took","rain","eat","room","friend","began","idea","fish","mountain","north","once","base","hear","horse","cut","sure","watch","color","face","wood","main","enough","plain","girl","usual","young","ready","above","ever","red","list","though","feel","talk","bird","soon","body","dog","family","direct","pose","leave","song","measure","state","product","black","short","numeral","class","wind","question","happen","complete","ship","area","half","rock","order","fire","south","problem","piece","told","knew","pass","farm","top","whole","king","size","heard","best","hour","better","true","during","hundred","am","remember","step","early","hold","west","ground","interest","reach","fast","five","sing","listen","six","table","travel","less","morning","name","very","through","just","form","much","great","think","say","help","low","line","before","turn","cause","same","mean","differ","move","right","boy","old","too","does","tell","sentence","set","three","want","air","well","also","play","small","end","put","home","read","hand","port","large","spell","add","even","land","here","must","big","high","such","follow","act","why","ask","men","change","went","light","kind","off","need","house","picture","try","us","again","animal","point","mother","world","near","build","self","earth","father","head","stand","own","page","should","country","found","answer","school","grow","study","still","learn","plant","cover","food","sun","four","thought","let","keep","eye","never","last","door","between","city","tree","cross","since","hard","start","might","story","saw","far","sea","draw","left","late","run","while","press","close","night","real","life","few","stop","ten","simple","several","vowel","toward","war","lay","against","pattern","slow","center","love","person","money","serve","appear","road","map","science","rule","govern","pull","cold","notice","voice","fall","power","town","fine","certain","fly","unit","lead","cry","dark","machine","note","wait","plan","figure","star","box","noun","field","rest","correct","able","pound","done","beauty","drive","stood","contain","front","teach","week","final","gave","green","oh","quick","develop","sleep","warm","free","minute","strong","special","mind","behind","clear","tail","produce","fact","street","inch","lot","nothing","course","stay","wheel","full","force","blue","object","decide","surface","deep","moon","island","foot","yet","busy","test","record","boat","common","gold","possible","plane","age","dry","wonder","laugh","thousand","ago","ran","check","game","shape","yes","hot","miss","brought","heat","snow","bed","bring","sit","perhaps","fill","east","weight","language","among");

for(n in splitString){
for(x in reqdWords){
if(reqdWords[x]==splitString[n]){
foundMatches+=reqdWords[x]+" ";

}
}
}
foundMatches=foundMatches.split(" ");
foundMatches.length=foundMatches.length-1;



//alert(foundMatches.length/word_count);
//example of what action to take
if((foundMatches.length/word_count)<0.4 && el.value!="Add comments here"){

document.testform.method="get";
document.testform.action="message_error.html";
return true;
}else{
return true;
}


}
var query = window.location.search;
// Skip the leading ?, which should always be there,
// but be careful anyway
if (query.substring(0, 1) == '?') {
query = query.substring(query.lastIndexOf('=')+1);
}
var data = query.replace(/\+/gi," ");
document.getElementById('w_in').value=data;
</script>
<style>
body{margin-left:50px;}

body{
background-image:url('index_bg_h585xw577.png');
background-repeat:no-repeat;
background-position:50px 0px;
border:0px solid red;
height:590px;}

form{padding-left:50px;}

#logo{padding-left:25px;}
</style>
</head>

<body>

<div>
<br />
<img id="logo" src='cose_seal_header-image.gif'><br /><br />
<form action="javascript:alert('would have sent')" id="testform" name="testform" method="post" onSubmit="return CountWords('w_in')">
<textarea cols=56 rows=10 name="w_in" id="w_in"></textarea><br />
<input type="submit" value="Test">
<input type="button" value="Clear All" onclick="window.location.href='checkFiveHundred.html'">
</form>
</div>


</body>

<script>
var query = window.location.search;
if (query.substring(0, 1) == '?') {
query = query.substring(query.lastIndexOf('=')+1);
}
var data = query.replace(/\+/gi," ");
if(query.length>0){
document.getElementById('w_in').value=unescape(data);
}
</script>
</html>

and

<html>
<head>
<script>
var query = window.location.search;
// Skip the leading ?, which should always be there,
// but be careful anyway
if (query.substring(0, 1) == '?') {
query = query.substring(1);
}
var data = query.split('+');
for (i = 0; (i < data.length); i++) {
data[i] = data[i];
}

function gotoForm(){
location.href="checkFiveHundred.html?"+query;
}
</script>
<style>
p{font-size:10pt;color:red;font-weight:bold;}
h4{color:red;font-size:12pt;}
</style>
</head>

<body>

<dir>
<script>
document.write("<h4>Your Comments:</h4><br />");
for (var i = 0; (i < data.length); i++) {
rawStr=data[i]+" ";
rawStr=unescape(rawStr.substring(rawStr.lastIndexOf('=')+1));
document.write(rawStr);
}
document.write("<hr>");
document.write("<br /><p>Does not contain a sufficient number of the 499 most common words in English. <br />As a precaution this message has not been sent.<br />");
document.write("Click here to return to the form.</p><p><input value='Return to form' type='button' onclick='gotoForm()'></p>");
</script>
</dir>
</body>
</html>

I will leave the weightier question of motives and security to wiser heads.

Ultimately though, tests/validations like these would ideally be run on the server-side, too. (in fact I am working on a version in PHP ... not ready for prime-time though). Frequently I have heard suggestions to use CAPTCHA (http://en.wikipedia.org/wiki/Captcha)(Completely Automated Public Turing test to tell Computers and Humans Apart.) but these stop user flow and make them react to some outside criteria which is in and of itself not bad, but it seemed more fun to try and figure out a way to do it myself. Sort of a home made SpamDetector/Terminater.